Windows: Don't use API for Processor Groups support if there is only 1 CPU group in the system. This can fix slowness issue observed on some PCs with AMD CPUs.

This commit is contained in:
Mounir IDRASSI 2020-07-01 17:11:07 +02:00
parent 5f04e5abd0
commit 3f587a3629
No known key found for this signature in database
GPG Key ID: 02C30AE90FAE4A6F
4 changed files with 86 additions and 41 deletions

View File

@ -20,6 +20,12 @@
#define TC_ENC_THREAD_POOL_MAX_THREAD_COUNT 256 //64
#define TC_ENC_THREAD_POOL_QUEUE_SIZE (TC_ENC_THREAD_POOL_MAX_THREAD_COUNT * 2)
#define TC_ENC_THREAD_POOL_LEGACY_MAX_THREAD_COUNT 64
#define TC_ENC_THREAD_POOL_LEGACY_QUEUE_SIZE (TC_ENC_THREAD_POOL_LEGACY_MAX_THREAD_COUNT * 2)
static volatile size_t ThreadPoolCount = TC_ENC_THREAD_POOL_LEGACY_MAX_THREAD_COUNT;
static volatile int ThreadQueueSize = TC_ENC_THREAD_POOL_LEGACY_QUEUE_SIZE;
#ifdef DEVICE_DRIVER
#define TC_THREAD_HANDLE PKTHREAD
@ -178,19 +184,22 @@ static void SetWorkItemState (EncryptionThreadPoolWorkItem *workItem, WorkItemSt
static TC_THREAD_PROC EncryptionThreadProc (void *threadArg)
{
EncryptionThreadPoolWorkItem *workItem;
#ifdef DEVICE_DRIVER
SetThreadCpuGroupAffinity ((USHORT) *(WORD*)(threadArg));
#else
SetThreadGroupAffinityFn SetThreadGroupAffinityPtr = (SetThreadGroupAffinityFn) GetProcAddress (GetModuleHandle (L"kernel32.dll"), "SetThreadGroupAffinity");
if (SetThreadGroupAffinityPtr && threadArg)
if (threadArg)
{
GROUP_AFFINITY groupAffinity = {0};
groupAffinity.Mask = ~0ULL;
groupAffinity.Group = *(WORD*)(threadArg);
SetThreadGroupAffinityPtr(GetCurrentThread(), &groupAffinity, NULL);
}
#ifdef DEVICE_DRIVER
SetThreadCpuGroupAffinity ((USHORT) *(WORD*)(threadArg));
#else
SetThreadGroupAffinityFn SetThreadGroupAffinityPtr = (SetThreadGroupAffinityFn) GetProcAddress (GetModuleHandle (L"kernel32.dll"), "SetThreadGroupAffinity");
if (SetThreadGroupAffinityPtr && threadArg)
{
GROUP_AFFINITY groupAffinity = {0};
groupAffinity.Mask = ~0ULL;
groupAffinity.Group = *(WORD*)(threadArg);
SetThreadGroupAffinityPtr(GetCurrentThread(), &groupAffinity, NULL);
}
#endif
}
while (!StopPending)
@ -199,7 +208,7 @@ static TC_THREAD_PROC EncryptionThreadProc (void *threadArg)
workItem = &WorkItemQueue[DequeuePosition++];
if (DequeuePosition >= TC_ENC_THREAD_POOL_QUEUE_SIZE)
if (DequeuePosition >= ThreadQueueSize)
DequeuePosition = 0;
while (!StopPending && GetWorkItemState (workItem) != WorkItemReady)
@ -288,13 +297,11 @@ static TC_THREAD_PROC EncryptionThreadProc (void *threadArg)
#endif
}
#ifndef DEVICE_DRIVER
BOOL EncryptionThreadPoolStart (size_t encryptionFreeCpuCount)
size_t GetCpuCount (WORD* pGroupCount)
{
size_t cpuCount = 0, i = 0;
#ifdef DEVICE_DRIVER
cpuCount = GetCpuCount();
#else
size_t cpuCount = 0;
SYSTEM_INFO sysInfo;
GetActiveProcessorGroupCountFn GetActiveProcessorGroupCountPtr = (GetActiveProcessorGroupCountFn) GetProcAddress (GetModuleHandle (L"Kernel32.dll"), "GetActiveProcessorGroupCount");
GetActiveProcessorCountFn GetActiveProcessorCountPtr = (GetActiveProcessorCountFn) GetProcAddress (GetModuleHandle (L"Kernel32.dll"), "GetActiveProcessorCount");
@ -307,25 +314,47 @@ BOOL EncryptionThreadPoolStart (size_t encryptionFreeCpuCount)
totalProcessors += (size_t) GetActiveProcessorCountPtr(j);
}
cpuCount = totalProcessors;
if (pGroupCount)
*pGroupCount = groupCount;
}
else
{
GetSystemInfo(&sysInfo);
cpuCount = sysInfo.dwNumberOfProcessors;
cpuCount = (size_t) sysInfo.dwNumberOfProcessors;
if (pGroupCount)
*pGroupCount = 1;
}
return cpuCount;
}
#endif
BOOL EncryptionThreadPoolStart (size_t encryptionFreeCpuCount)
{
size_t cpuCount = 0, i = 0;
WORD groupCount = 1;
cpuCount = GetCpuCount(&groupCount);
if (ThreadPoolRunning)
return TRUE;
if (groupCount > 1)
{
ThreadPoolCount = TC_ENC_THREAD_POOL_MAX_THREAD_COUNT;
ThreadQueueSize = TC_ENC_THREAD_POOL_QUEUE_SIZE;
}
if (cpuCount > encryptionFreeCpuCount)
cpuCount -= encryptionFreeCpuCount;
if (cpuCount < 2)
return TRUE;
if (cpuCount > TC_ENC_THREAD_POOL_MAX_THREAD_COUNT)
cpuCount = TC_ENC_THREAD_POOL_MAX_THREAD_COUNT;
if (cpuCount > ThreadPoolCount)
cpuCount = ThreadPoolCount;
StopPending = FALSE;
DequeuePosition = 0;
@ -377,32 +406,38 @@ BOOL EncryptionThreadPoolStart (size_t encryptionFreeCpuCount)
for (ThreadCount = 0; ThreadCount < cpuCount; ++ThreadCount)
{
#ifdef DEVICE_DRIVER
ThreadProcessorGroups[ThreadCount] = GetCpuGroup ((size_t) ThreadCount);
#else
// Determine which processor group to bind the thread to.
if (GetActiveProcessorGroupCountPtr && GetActiveProcessorCountPtr)
WORD* pThreadArg = NULL;
if (groupCount > 1)
{
WORD j, groupCount = GetActiveProcessorGroupCountPtr();
uint32 totalProcessors = 0U;
for (j = 0U; j < groupCount; j++)
#ifdef DEVICE_DRIVER
ThreadProcessorGroups[ThreadCount] = GetCpuGroup ((size_t) ThreadCount);
#else
GetActiveProcessorCountFn GetActiveProcessorCountPtr = (GetActiveProcessorCountFn) GetProcAddress (GetModuleHandle (L"Kernel32.dll"), "GetActiveProcessorCount");
// Determine which processor group to bind the thread to.
if (GetActiveProcessorCountPtr)
{
totalProcessors += (uint32) GetActiveProcessorCountPtr(j);
if (totalProcessors >= ThreadCount)
WORD j;
uint32 totalProcessors = 0U;
for (j = 0U; j < groupCount; j++)
{
ThreadProcessorGroups[ThreadCount] = j;
break;
totalProcessors += (uint32) GetActiveProcessorCountPtr(j);
if (totalProcessors >= ThreadCount)
{
ThreadProcessorGroups[ThreadCount] = j;
break;
}
}
}
}
else
ThreadProcessorGroups[ThreadCount] = 0;
else
ThreadProcessorGroups[ThreadCount] = 0;
#endif
pThreadArg = &ThreadProcessorGroups[ThreadCount];
}
#ifdef DEVICE_DRIVER
if (!NT_SUCCESS(TCStartThread(EncryptionThreadProc, (void*)(&ThreadProcessorGroups[ThreadCount]), &ThreadHandles[ThreadCount])))
if (!NT_SUCCESS(TCStartThread(EncryptionThreadProc, (void*) pThreadArg, &ThreadHandles[ThreadCount])))
#else
if (!(ThreadHandles[ThreadCount] = (HANDLE)_beginthreadex(NULL, 0, EncryptionThreadProc, (void*)(&ThreadProcessorGroups[ThreadCount]), 0, NULL)))
if (!(ThreadHandles[ThreadCount] = (HANDLE)_beginthreadex(NULL, 0, EncryptionThreadProc, (void*) pThreadArg, 0, NULL)))
#endif
{
@ -465,7 +500,7 @@ void EncryptionThreadPoolBeginKeyDerivation (TC_EVENT *completionEvent, TC_EVENT
TC_ACQUIRE_MUTEX (&EnqueueMutex);
workItem = &WorkItemQueue[EnqueuePosition++];
if (EnqueuePosition >= TC_ENC_THREAD_POOL_QUEUE_SIZE)
if (EnqueuePosition >= ThreadQueueSize)
EnqueuePosition = 0;
while (GetWorkItemState (workItem) != WorkItemFree)
@ -564,7 +599,7 @@ void EncryptionThreadPoolDoWork (EncryptionThreadPoolWorkType type, byte *data,
while (fragmentCount-- > 0)
{
workItem = &WorkItemQueue[EnqueuePosition++];
if (EnqueuePosition >= TC_ENC_THREAD_POOL_QUEUE_SIZE)
if (EnqueuePosition >= ThreadQueueSize)
EnqueuePosition = 0;
while (GetWorkItemState (workItem) != WorkItemFree)
@ -606,7 +641,7 @@ size_t GetEncryptionThreadCount ()
size_t GetMaxEncryptionThreadCount ()
{
return TC_ENC_THREAD_POOL_MAX_THREAD_COUNT;
return ThreadPoolCount;
}

View File

@ -27,6 +27,10 @@ typedef enum
DeriveKeyWork
} EncryptionThreadPoolWorkType;
#ifndef DEVICE_DRIVER
size_t GetCpuCount (WORD* pGroupCount);
#endif
void EncryptionThreadPoolBeginKeyDerivation (TC_EVENT *completionEvent, TC_EVENT *noOutstandingWorkItemEvent, LONG *completionFlag, LONG *outstandingWorkItemCount, int pkcs5Prf, char *password, int passwordLength, char *salt, int iterationCount, char *derivedKey);
void EncryptionThreadPoolDoWork (EncryptionThreadPoolWorkType type, byte *data, const UINT64_STRUCT *startUnitNo, uint32 unitCount, PCRYPTO_INFO cryptoInfo);
BOOL EncryptionThreadPoolStart (size_t encryptionFreeCpuCount);

View File

@ -4491,7 +4491,7 @@ NTSTATUS TCCompleteDiskIrp (PIRP irp, NTSTATUS status, ULONG_PTR information)
}
size_t GetCpuCount ()
size_t GetCpuCount (WORD* pGroupCount)
{
size_t cpuCount = 0;
if (KeQueryActiveGroupCountPtr && KeQueryActiveProcessorCountExPtr)
@ -4501,6 +4501,9 @@ size_t GetCpuCount ()
{
cpuCount += (size_t) KeQueryActiveProcessorCountExPtr (i);
}
if (pGroupCount)
*pGroupCount = groupCount;
}
else
{
@ -4514,6 +4517,9 @@ size_t GetCpuCount ()
activeCpuMap >>= 1;
}
if (pGroupCount)
*pGroupCount = 1;
}
if (cpuCount == 0)

View File

@ -173,7 +173,7 @@ NTSTATUS TCCompleteIrp (PIRP irp, NTSTATUS status, ULONG_PTR information);
NTSTATUS TCCompleteDiskIrp (PIRP irp, NTSTATUS status, ULONG_PTR information);
NTSTATUS ProbeRealDriveSize (PDEVICE_OBJECT driveDeviceObject, LARGE_INTEGER *driveSize);
BOOL UserCanAccessDriveDevice ();
size_t GetCpuCount ();
size_t GetCpuCount (WORD* pGroupCount);
USHORT GetCpuGroup (size_t index);
void SetThreadCpuGroupAffinity (USHORT index);
void EnsureNullTerminatedString (wchar_t *str, size_t maxSizeInBytes);