Windows Driver: correctly save and restore extended processor state when performing AVX operations on Windows 7 and later. Enhance readability of code handling save/restore of floating point state.

This commit is contained in:
Mounir IDRASSI 2017-07-04 02:05:11 +02:00
parent c2f6190627
commit 89efcdb8cd
No known key found for this signature in database
GPG Key ID: DD0C382D5FCFB8FC
8 changed files with 201 additions and 39 deletions

View File

@ -99,6 +99,18 @@ void hmac_sha256
char* buf = hmac.k;
int b;
char key[SHA256_DIGESTSIZE];
#if defined (DEVICE_DRIVER)
NTSTATUS saveStatus = STATUS_INVALID_PARAMETER;
#ifdef _WIN64
XSTATE_SAVE SaveState;
if (g_isIntel && HasSAVX())
saveStatus = KeSaveExtendedProcessorState(XSTATE_MASK_GSSE, &SaveState);
#else
KFLOATING_SAVE floatingPointState;
if (HasSSE2())
saveStatus = KeSaveFloatingPointState (&floatingPointState);
#endif
#endif
/* If the key is longer than the hash algorithm block size,
let key = sha256(key), as per HMAC specifications. */
if (lk > SHA256_BLOCKSIZE)
@ -139,6 +151,16 @@ void hmac_sha256
sha256_hash ((unsigned char *) buf, SHA256_BLOCKSIZE, ctx);
hmac_sha256_internal(d, ld, &hmac);
#if defined (DEVICE_DRIVER)
if (NT_SUCCESS (saveStatus))
#ifdef _WIN64
KeRestoreExtendedProcessorState(&SaveState);
#else
KeRestoreFloatingPointState (&floatingPointState);
#endif
#endif
/* Prevent leaks */
burn(&hmac, sizeof(hmac));
burn(key, sizeof(key));
@ -204,6 +226,18 @@ void derive_key_sha256 (char *pwd, int pwd_len, char *salt, int salt_len, uint32
int b, l, r;
#ifndef TC_WINDOWS_BOOT
char key[SHA256_DIGESTSIZE];
#if defined (DEVICE_DRIVER)
NTSTATUS saveStatus = STATUS_INVALID_PARAMETER;
#ifdef _WIN64
XSTATE_SAVE SaveState;
if (g_isIntel && HasSAVX())
saveStatus = KeSaveExtendedProcessorState(XSTATE_MASK_GSSE, &SaveState);
#else
KFLOATING_SAVE floatingPointState;
if (HasSSE2())
saveStatus = KeSaveFloatingPointState (&floatingPointState);
#endif
#endif
/* If the password is longer than the hash algorithm block size,
let pwd = sha256(pwd), as per HMAC specifications. */
if (pwd_len > SHA256_BLOCKSIZE)
@ -267,6 +301,14 @@ void derive_key_sha256 (char *pwd, int pwd_len, char *salt, int salt_len, uint32
derive_u_sha256 (salt, salt_len, iterations, b, &hmac);
memcpy (dk, hmac.u, r);
#if defined (DEVICE_DRIVER)
if (NT_SUCCESS (saveStatus))
#ifdef _WIN64
KeRestoreExtendedProcessorState(&SaveState);
#else
KeRestoreFloatingPointState (&floatingPointState);
#endif
#endif
/* Prevent possible leaks. */
burn (&hmac, sizeof(hmac));
@ -327,11 +369,17 @@ void hmac_sha512
char* buf = hmac.k;
int b;
char key[SHA512_DIGESTSIZE];
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
KFLOATING_SAVE floatingPointState;
NTSTATUS saveStatus = STATUS_SUCCESS;
if (HasSSE2() && HasMMX())
#if defined (DEVICE_DRIVER)
NTSTATUS saveStatus = STATUS_INVALID_PARAMETER;
#ifdef _WIN64
XSTATE_SAVE SaveState;
if (g_isIntel && HasSAVX())
saveStatus = KeSaveExtendedProcessorState(XSTATE_MASK_GSSE, &SaveState);
#else
KFLOATING_SAVE floatingPointState;
if (HasSSSE3() && HasMMX())
saveStatus = KeSaveFloatingPointState (&floatingPointState);
#endif
#endif
/* If the key is longer than the hash algorithm block size,
@ -375,9 +423,13 @@ void hmac_sha512
hmac_sha512_internal (d, ld, &hmac);
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
if (NT_SUCCESS (saveStatus) && (HasSSE2() && HasMMX()))
#if defined (DEVICE_DRIVER)
if (NT_SUCCESS (saveStatus))
#ifdef _WIN64
KeRestoreExtendedProcessorState(&SaveState);
#else
KeRestoreFloatingPointState (&floatingPointState);
#endif
#endif
/* Prevent leaks */
@ -419,11 +471,17 @@ void derive_key_sha512 (char *pwd, int pwd_len, char *salt, int salt_len, uint32
char* buf = hmac.k;
int b, l, r;
char key[SHA512_DIGESTSIZE];
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
KFLOATING_SAVE floatingPointState;
NTSTATUS saveStatus = STATUS_SUCCESS;
if (HasSSE2() && HasMMX())
#if defined (DEVICE_DRIVER)
NTSTATUS saveStatus = STATUS_INVALID_PARAMETER;
#ifdef _WIN64
XSTATE_SAVE SaveState;
if (g_isIntel && HasSAVX())
saveStatus = KeSaveExtendedProcessorState(XSTATE_MASK_GSSE, &SaveState);
#else
KFLOATING_SAVE floatingPointState;
if (HasSSSE3() && HasMMX())
saveStatus = KeSaveFloatingPointState (&floatingPointState);
#endif
#endif
/* If the password is longer than the hash algorithm block size,
@ -488,9 +546,13 @@ void derive_key_sha512 (char *pwd, int pwd_len, char *salt, int salt_len, uint32
derive_u_sha512 (salt, salt_len, iterations, b, &hmac);
memcpy (dk, hmac.u, r);
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
if (NT_SUCCESS (saveStatus) && (HasSSE2() && HasMMX()))
#if defined (DEVICE_DRIVER)
if (NT_SUCCESS (saveStatus))
#ifdef _WIN64
KeRestoreExtendedProcessorState(&SaveState);
#else
KeRestoreFloatingPointState (&floatingPointState);
#endif
#endif
/* Prevent possible leaks. */
@ -771,7 +833,7 @@ void hmac_whirlpool
char key[WHIRLPOOL_DIGESTSIZE];
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
KFLOATING_SAVE floatingPointState;
NTSTATUS saveStatus = STATUS_SUCCESS;
NTSTATUS saveStatus = STATUS_INVALID_PARAMETER;
if (HasISSE())
saveStatus = KeSaveFloatingPointState (&floatingPointState);
#endif
@ -817,7 +879,7 @@ void hmac_whirlpool
hmac_whirlpool_internal(d, ld, &hmac);
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
if (NT_SUCCESS (saveStatus) && HasISSE())
if (NT_SUCCESS (saveStatus))
KeRestoreFloatingPointState (&floatingPointState);
#endif
/* Prevent leaks */
@ -859,7 +921,7 @@ void derive_key_whirlpool (char *pwd, int pwd_len, char *salt, int salt_len, uin
int b, l, r;
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
KFLOATING_SAVE floatingPointState;
NTSTATUS saveStatus = STATUS_SUCCESS;
NTSTATUS saveStatus = STATUS_INVALID_PARAMETER;
if (HasISSE())
saveStatus = KeSaveFloatingPointState (&floatingPointState);
#endif
@ -926,7 +988,7 @@ void derive_key_whirlpool (char *pwd, int pwd_len, char *salt, int salt_len, uin
memcpy (dk, hmac.u, r);
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
if (NT_SUCCESS (saveStatus) && HasISSE())
if (NT_SUCCESS (saveStatus))
KeRestoreFloatingPointState (&floatingPointState);
#endif
@ -986,7 +1048,7 @@ void hmac_streebog
CRYPTOPP_ALIGN_DATA(16) char key[STREEBOG_DIGESTSIZE];
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
KFLOATING_SAVE floatingPointState;
NTSTATUS saveStatus = STATUS_SUCCESS;
NTSTATUS saveStatus = STATUS_INVALID_PARAMETER;
if (HasSSE2() || HasSSE41())
saveStatus = KeSaveFloatingPointState (&floatingPointState);
#endif
@ -1032,7 +1094,7 @@ void hmac_streebog
hmac_streebog_internal(d, ld, &hmac);
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
if (NT_SUCCESS (saveStatus) && (HasSSE2() || HasSSE41()))
if (NT_SUCCESS (saveStatus))
KeRestoreFloatingPointState (&floatingPointState);
#endif
/* Prevent leaks */
@ -1074,7 +1136,7 @@ void derive_key_streebog (char *pwd, int pwd_len, char *salt, int salt_len, uint
int b, l, r;
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
KFLOATING_SAVE floatingPointState;
NTSTATUS saveStatus = STATUS_SUCCESS;
NTSTATUS saveStatus = STATUS_INVALID_PARAMETER;
if (HasSSE2() || HasSSE41())
saveStatus = KeSaveFloatingPointState (&floatingPointState);
#endif
@ -1141,7 +1203,7 @@ void derive_key_streebog (char *pwd, int pwd_len, char *salt, int salt_len, uint
memcpy (dk, hmac.u, r);
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
if (NT_SUCCESS (saveStatus) && (HasSSE2() || HasSSE41()))
if (NT_SUCCESS (saveStatus))
KeRestoreFloatingPointState (&floatingPointState);
#endif

View File

@ -260,6 +260,26 @@ typedef int BOOL;
#define FALSE !TRUE
#endif
typedef NTSTATUS (NTAPI *KeSaveExtendedProcessorStateFn) (
__in ULONG64 Mask,
PXSTATE_SAVE XStateSave
);
typedef VOID (NTAPI *KeRestoreExtendedProcessorStateFn) (
PXSTATE_SAVE XStateSave
);
extern NTSTATUS NTAPI KeSaveExtendedProcessorState (
__in ULONG64 Mask,
PXSTATE_SAVE XStateSave
);
extern VOID NTAPI KeRestoreExtendedProcessorState (
PXSTATE_SAVE XStateSave
);
#else /* !TC_WINDOWS_DRIVER */
#if !defined(_UEFI)
#define TCalloc malloc

View File

@ -583,8 +583,8 @@ BOOL RunHashTest (HashFunction fn, HashTestVector* vector, BOOL bUseSSE)
BOOL bRet = TRUE;
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
KFLOATING_SAVE floatingPointState;
NTSTATUS saveStatus = STATUS_SUCCESS;
if (bUseSSE && (HasISSE() || HasSSE2()))
NTSTATUS saveStatus = STATUS_INVALID_PARAMETER;
if (bUseSSE)
saveStatus = KeSaveFloatingPointState (&floatingPointState);
#endif
while (vector[i].hexInput && vector[i].hexOutput)
@ -601,7 +601,7 @@ BOOL RunHashTest (HashFunction fn, HashTestVector* vector, BOOL bUseSSE)
}
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
if (NT_SUCCESS (saveStatus) && bUseSSE && (HasISSE() || HasSSE2()))
if (NT_SUCCESS (saveStatus))
KeRestoreFloatingPointState (&floatingPointState);
#endif
@ -1508,7 +1508,7 @@ BOOL test_pkcs5 ()
return FALSE;
/* STREEBOG hash tests */
if (RunHashTest (StreebogHash, Streebog512TestVectors, TRUE) == FALSE)
if (RunHashTest (StreebogHash, Streebog512TestVectors, (HasSSE2() || HasSSE41())? TRUE : FALSE) == FALSE)
return FALSE;
/* PKCS-5 test 1 with HMAC-SHA-256 used as the PRF (https://tools.ietf.org/html/draft-josefsson-scrypt-kdf-00) */

View File

@ -1096,15 +1096,24 @@ void camellia_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock
void camellia_encrypt_blocks(unsigned __int8 *instance, const byte* in_blk, byte* out_blk, uint32 blockCount)
{
#if !defined (_UEFI)
if (IsCpuIntel() && IsAesHwCpuSupported () && HasSAVX()) /* on AMD cpu, AVX is too slow */
if ((blockCount >= 16) && IsCpuIntel() && IsAesHwCpuSupported () && HasSAVX()) /* on AMD cpu, AVX is too slow */
{
while (blockCount >= 16)
#if defined (TC_WINDOWS_DRIVER)
XSTATE_SAVE SaveState;
if (NT_SUCCESS (KeSaveExtendedProcessorState(XSTATE_MASK_GSSE, &SaveState)))
{
camellia_ecb_enc_16way (instance, out_blk, in_blk);
out_blk += 16 * 16;
in_blk += 16 * 16;
blockCount -= 16;
#endif
while (blockCount >= 16)
{
camellia_ecb_enc_16way (instance, out_blk, in_blk);
out_blk += 16 * 16;
in_blk += 16 * 16;
blockCount -= 16;
}
#if defined (TC_WINDOWS_DRIVER)
KeRestoreExtendedProcessorState(&SaveState);
}
#endif
}
#endif
@ -1123,8 +1132,13 @@ void camellia_encrypt_blocks(unsigned __int8 *instance, const byte* in_blk, byte
void camellia_decrypt_blocks(unsigned __int8 *instance, const byte* in_blk, byte* out_blk, uint32 blockCount)
{
#if !defined (_UEFI)
if (IsCpuIntel() && IsAesHwCpuSupported () && HasSAVX()) /* on AMD cpu, AVX is too slow */
if ((blockCount >= 16) && IsCpuIntel() && IsAesHwCpuSupported () && HasSAVX()) /* on AMD cpu, AVX is too slow */
{
#if defined (TC_WINDOWS_DRIVER)
XSTATE_SAVE SaveState;
if (NT_SUCCESS (KeSaveExtendedProcessorState(XSTATE_MASK_GSSE, &SaveState)))
{
#endif
while (blockCount >= 16)
{
camellia_ecb_dec_16way (instance, out_blk, in_blk);
@ -1132,6 +1146,10 @@ void camellia_decrypt_blocks(unsigned __int8 *instance, const byte* in_blk, byte
in_blk += 16 * 16;
blockCount -= 16;
}
#if defined (TC_WINDOWS_DRIVER)
KeRestoreExtendedProcessorState(&SaveState);
}
#endif
}
#endif

View File

@ -96,7 +96,7 @@ void gost_set_key(const byte *key, gost_kds *ks, int useDynamicSbox)
byte sbox_seed[64];
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
KFLOATING_SAVE floatingPointState;
NTSTATUS saveStatus = STATUS_SUCCESS;
NTSTATUS saveStatus = STATUS_INVALID_PARAMETER;
if (HasSSE2() || HasSSE41())
saveStatus = KeSaveFloatingPointState (&floatingPointState);
#endif
@ -106,7 +106,7 @@ void gost_set_key(const byte *key, gost_kds *ks, int useDynamicSbox)
STREEBOG_finalize(&sctx, sbox_seed);
#if defined (DEVICE_DRIVER) && !defined (_WIN64)
if (NT_SUCCESS (saveStatus) && (HasSSE2() || HasSSE41()))
if (NT_SUCCESS (saveStatus))
KeRestoreFloatingPointState (&floatingPointState);
#endif

View File

@ -327,10 +327,14 @@ static void ComputeBootLoaderFingerprint(PDEVICE_OBJECT LowerDeviceObject, byte*
status = TCReadDevice (LowerDeviceObject, ioBuffer, offset, TC_SECTOR_SIZE_BIOS);
if (NT_SUCCESS (status))
{
#if !defined (_WIN64)
KFLOATING_SAVE floatingPointState;
NTSTATUS saveStatus = STATUS_SUCCESS;
if (HasISSE()|| (HasSSE2() && HasMMX()))
NTSTATUS saveStatus = STATUS_INVALID_PARAMETER;
#ifdef _WIN64
XSTATE_SAVE SaveState;
if (g_isIntel && HasSAVX())
saveStatus = KeSaveExtendedProcessorState(XSTATE_MASK_GSSE, &SaveState);
#else
KFLOATING_SAVE floatingPointState;
if (HasISSE() || (HasSSSE3() && HasMMX()))
saveStatus = KeSaveFloatingPointState (&floatingPointState);
#endif
WHIRLPOOL_add (ioBuffer, TC_BOOT_SECTOR_PIM_VALUE_OFFSET, &whirlpool);
@ -367,8 +371,10 @@ static void ComputeBootLoaderFingerprint(PDEVICE_OBJECT LowerDeviceObject, byte*
sha512_end (&BootLoaderFingerprint [WHIRLPOOL_DIGESTSIZE], &sha2);
}
#if !defined (_WIN64)
if (NT_SUCCESS (saveStatus) && (HasISSE() || (HasSSE2() && HasMMX())))
if (NT_SUCCESS (saveStatus))
#ifdef _WIN64
KeRestoreExtendedProcessorState(&SaveState);
#else
KeRestoreFloatingPointState (&floatingPointState);
#endif
}

View File

@ -14,6 +14,7 @@
#include "DriveFilter.h"
#include "Ntdriver.h"
#include "Tests.h"
#include "cpu.h"
static DriveFilterExtension *BootDriveFilterExtension = NULL;
static LARGE_INTEGER DumpPartitionOffset;
@ -63,7 +64,21 @@ NTSTATUS DumpFilterEntry (PFILTER_EXTENSION filterExtension, PFILTER_INITIALIZAT
// KeSaveFloatingPointState() may generate a bug check during crash dump
#if !defined (_WIN64)
if (filterExtension->DumpType == DumpTypeCrashdump)
{
dumpConfig.HwEncryptionEnabled = FALSE;
// disable also SSE optimizations
HasMMX() = 0;
HasISSE() = 0;
HasSSE2() = 0;
HasSSSE3() = 0;
HasSSE41() = 0;
HasSSE42() = 0;
HasAESNI() = 0;
HasCLMUL() = 0;
HasSAVX() = 0;
HasSAVX2() = 0;
HasSBMI2() = 0;
}
#endif
EnableHwEncryption (dumpConfig.HwEncryptionEnabled);

View File

@ -73,6 +73,11 @@
#pragma alloc_text(INIT,DriverEntry)
#pragma alloc_text(INIT,TCCreateRootDeviceObject)
/* We need to silence 'type cast' warning in order to use MmGetSystemRoutineAddress.
* MmGetSystemRoutineAddress() should have been declare FARPROC instead of PVOID.
*/
#pragma warning(disable:4055)
PDRIVER_OBJECT TCDriverObject;
PDEVICE_OBJECT RootDeviceObject = NULL;
static KMUTEX RootDeviceControlMutex;
@ -91,6 +96,8 @@ static size_t EncryptionThreadPoolFreeCpuCountLimit = 0;
static BOOL SystemFavoriteVolumeDirty = FALSE;
static BOOL PagingFileCreationPrevented = FALSE;
static BOOL EnableExtendedIoctlSupport = FALSE;
static KeSaveExtendedProcessorStateFn KeSaveExtendedProcessorStatePtr = NULL;
static KeRestoreExtendedProcessorStateFn KeRestoreExtendedProcessorStatePtr = NULL;
POOL_TYPE ExDefaultNonPagedPoolType = NonPagedPool;
ULONG ExDefaultMdlProtection = 0;
@ -119,6 +126,15 @@ NTSTATUS DriverEntry (PDRIVER_OBJECT DriverObject, PUNICODE_STRING RegistryPath)
ExDefaultMdlProtection = MdlMappingNoExecute;
}
// KeSaveExtendedProcessorState/KeRestoreExtendedProcessorState are available starting from Windows 7
if ((OsMajorVersion > 6) || (OsMajorVersion == 6 && OsMinorVersion >= 1))
{
UNICODE_STRING funcName;
RtlInitUnicodeString(&funcName, L"KeSaveExtendedProcessorState");
KeSaveExtendedProcessorStatePtr = (KeSaveExtendedProcessorStateFn) MmGetSystemRoutineAddress(&funcName);
KeRestoreExtendedProcessorStatePtr = (KeRestoreExtendedProcessorStateFn) MmGetSystemRoutineAddress(&funcName);
}
// Load dump filter if the main driver is already loaded
if (NT_SUCCESS (TCDeviceIoControl (NT_ROOT_PREFIX, TC_IOCTL_GET_DRIVER_VERSION, NULL, 0, &version, sizeof (version))))
return DumpFilterEntry ((PFILTER_EXTENSION) DriverObject, (PFILTER_INITIALIZATION_DATA) RegistryPath);
@ -3960,3 +3976,28 @@ BOOL IsOSAtLeast (OSVersionEnum reqMinOS)
return ((OsMajorVersion << 16 | OsMinorVersion << 8)
>= (major << 16 | minor << 8));
}
NTSTATUS NTAPI KeSaveExtendedProcessorState (
__in ULONG64 Mask,
PXSTATE_SAVE XStateSave
)
{
if (KeSaveExtendedProcessorStatePtr)
{
return (KeSaveExtendedProcessorStatePtr) (Mask, XStateSave);
}
else
{
return STATUS_SUCCESS;
}
}
VOID NTAPI KeRestoreExtendedProcessorState (
PXSTATE_SAVE XStateSave
)
{
if (KeRestoreExtendedProcessorStatePtr)
{
(KeRestoreExtendedProcessorStatePtr) (XStateSave);
}
}