diff --git a/winml/lib/Api/HardwareCoreEnumerator.cpp b/winml/lib/Api/HardwareCoreEnumerator.cpp index df751f56187e0..eb4fbc7aa63b5 100644 --- a/winml/lib/Api/HardwareCoreEnumerator.cpp +++ b/winml/lib/Api/HardwareCoreEnumerator.cpp @@ -34,39 +34,12 @@ static LogicalProcessorInformation GetLogicalProcessorInfos(LOGICAL_PROCESSOR_RE return {std::move(processorInformationBytes), length}; } -static long long GetNumberOfSoCDieCores() { - DWORD dwLevel2GroupMask = 0; - DWORD dwLevel3GroupMask = 0; - DWORD dwSoCGroupMask = 0; - - auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll); - auto processorInformation = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)logicalProcessorInformation.Buffer.get(); - - size_t read = 0; - while (read <= logicalProcessorInformation.Length) { - switch (processorInformation->Relationship) { - case RelationCache: - if (processorInformation->Cache.Level == 2) { - dwLevel2GroupMask |= processorInformation->Cache.GroupMask.Mask; - } else if (processorInformation->Cache.Level == 3) { - dwLevel3GroupMask |= processorInformation->Cache.GroupMask.Mask; - } - break; - } - - read += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX); - processorInformation++; - } - - dwSoCGroupMask = (dwLevel2GroupMask & ~dwLevel3GroupMask); - - return __popcnt(dwSoCGroupMask); -} - static CoreCounter GetNumberOPhysicalAndEngineeringCores() { - auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationProcessorCore); + auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll); CoreCounter cores; + DWORD dwLevel2GroupMask = 0; + DWORD dwLevel3GroupMask = 0; size_t read = 0; PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX currentProcessorInfo = NULL; @@ -80,12 +53,19 @@ static CoreCounter GetNumberOPhysicalAndEngineeringCores() { case RelationProcessorCore: cores.PhysicalCores++; break; + case RelationCache: + if (currentProcessorInfo->Cache.Level == 2) { + dwLevel2GroupMask |= currentProcessorInfo->Cache.GroupMask.Mask; + } else if (currentProcessorInfo->Cache.Level == 3) { + dwLevel3GroupMask |= currentProcessorInfo->Cache.GroupMask.Mask; + } + break; } read += currentProcessorInfo->Size; } - cores.SocDieCores = GetNumberOfSoCDieCores(); + cores.SocDieCores = __popcnt(dwLevel2GroupMask & ~dwLevel3GroupMask); return cores; } @@ -93,6 +73,7 @@ uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() { // # of physical cores = # of P cores + # of E Cores + # of Soc Cores. // # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores. auto cores = GetNumberOPhysicalAndEngineeringCores(); + // We want to use the number of pysical cores, but exclude soc cores return static_cast(cores.PhysicalCores - cores.SocDieCores); } diff --git a/winml/lib/Api/LearningModelDevice.cpp b/winml/lib/Api/LearningModelDevice.cpp index 70c5faff44f5b..9f48ee03886e1 100644 --- a/winml/lib/Api/LearningModelDevice.cpp +++ b/winml/lib/Api/LearningModelDevice.cpp @@ -130,7 +130,6 @@ LearningModelDevice::CacheThreadPool(_winml::IThreading* thread_pool) { return S_OK; } - uint32_t LearningModelDevice::NumberOfIntraOpThreads() { if (IsCpuDevice()) { return HardwareCoreEnumerator::DefaultIntraOpNumThreads(); diff --git a/winml/lib/Api/LearningModelSessionOptions.cpp b/winml/lib/Api/LearningModelSessionOptions.cpp index 3a0c861f6e7f7..f85a866e34f3d 100644 --- a/winml/lib/Api/LearningModelSessionOptions.cpp +++ b/winml/lib/Api/LearningModelSessionOptions.cpp @@ -12,7 +12,6 @@ LearningModelSessionOptions::LearningModelSessionOptions() intra_op_num_threads_override_ = HardwareCoreEnumerator::DefaultIntraOpNumThreads(); } - LearningModelSessionOptions::LearningModelSessionOptions(const LearningModelSessionOptions& options) : batch_size_override_(options.batch_size_override_), close_model_on_session_creation_(options.close_model_on_session_creation_),