Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#19921 [Dup] LLC Core count calculations updated #20171

Merged
merged 5 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions onnxruntime/core/platform/windows/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ limitations under the License.
#include "core/common/span_utils.h"
#include "core/platform/env.h"
#include "core/platform/scoped_resource.h"
#if defined(_M_X64) && !defined(_M_ARM64EC) && defined(ONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH)
#if defined(_M_X64) && !defined(_M_ARM64EC)
#include "core/platform/windows/hardware_core_enumerator.h"
#endif
#include <unsupported/Eigen/CXX11/ThreadPool>
Expand Down Expand Up @@ -252,7 +252,7 @@ void WindowsEnv::SleepForMicroseconds(int64_t micros) const {
}

// EIGEN_NO_CPUID is not defined in any C/C++ source code. It is a compile option.
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID) && defined(ONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH)
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID)
static constexpr std::array<int, 3> kVendorID_Intel = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI"
#endif
int WindowsEnv::DefaultNumCores() {
Expand All @@ -261,7 +261,7 @@ int WindowsEnv::DefaultNumCores() {

int WindowsEnv::GetNumPhysicalCpuCores() const {
// EIGEN_NO_CPUID is not defined in any C/C++ source code. It is a compile option.
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID) && defined(ONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH)
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID)
// The following code is a temporary fix for a perf problem on Intel's Meteor Lake CPUs. The Intel compute platform has
// a hybrid architecture that some CPU cores runs significant slower than the others. If we distribute our compute work
// evenly to all CPU cores, the slowest CPU core will drag the performance down. So, instead, we reduce the total number
Expand Down
13 changes: 7 additions & 6 deletions onnxruntime/core/platform/windows/hardware_core_enumerator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ struct LogicalProcessorInformation {

struct CoreCounter {
uint32_t PhysicalCores = 0;
uint32_t SocDieCores = 0;
uint32_t LLCCores = 0;
};

static LogicalProcessorInformation GetLogicalProcessorInfos(LOGICAL_PROCESSOR_RELATIONSHIP relationship) {
Expand All @@ -42,7 +42,7 @@ uint32_t CountSetBits(DWORD input) {
return c;
}

static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
static CoreCounter GetCoreInfo() {
auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll);

CoreCounter cores;
Expand Down Expand Up @@ -73,17 +73,18 @@ static CoreCounter GetNumberOPhysicalAndEngineeringCores() {

read += currentProcessorInfo->Size;
}
// Cores with L2 and LLC cache levels = # Physical Cores - # logical cores without LLC
cores.LLCCores = cores.PhysicalCores - CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);

cores.SocDieCores = CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);
return cores;
}

uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
// # of physical cores = # of P cores + # of E Cores + # of Soc Cores.
// # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores.
auto cores = GetNumberOPhysicalAndEngineeringCores();
// We want to use the number of physical cores, but exclude soc cores
return cores.PhysicalCores - cores.SocDieCores;
auto cores = GetCoreInfo();

return cores.LLCCores;
}

} // namespace onnxruntime
15 changes: 8 additions & 7 deletions winml/lib/Api/HardwareCoreEnumerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

struct CoreCounter {
uint32_t PhysicalCores = 0;
uint32_t Num2CacheCores = 0;
uint32_t LLCCores = 0;
};

static LogicalProcessorInformation GetLogicalProcessorInfos(LOGICAL_PROCESSOR_RELATIONSHIP relationship) {
Expand Down Expand Up @@ -42,7 +42,7 @@
return c;
}

static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
static CoreCounter GetCoreInfo() {
auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll);

CoreCounter cores;
Expand All @@ -64,6 +64,7 @@
cores.PhysicalCores++;
break;
case RelationCache:
//Cache level masks count Logicial processors

Check warning on line 67 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Should have a space between // and comment [whitespace/comments] [4] Raw Output: winml/lib/Api/HardwareCoreEnumerator.cpp:67: Should have a space between // and comment [whitespace/comments] [4]
if (currentProcessorInfo->Cache.Level == 2) {
dwLevel2GroupMask |= currentProcessorInfo->Cache.GroupMask.Mask;
} else if (currentProcessorInfo->Cache.Level == 3) {
Expand All @@ -75,14 +76,15 @@
read += currentProcessorInfo->Size;
}

cores.Num2CacheCores = CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);
cores.LLCCores = cores.PhysicalCores - CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);

return cores;
}

uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
// # of physical cores = # of P cores + # of E Cores + # of Soc Cores.
// # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores.
auto cores = GetNumberOPhysicalAndEngineeringCores();
auto cores = GetCoreInfo();

#if !defined(_M_ARM64EC) && !defined(_M_ARM64) && !defined(__aarch64__)
const int kVendorID_Intel[3] = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI"
Expand All @@ -97,9 +99,8 @@
auto isHybrid = (regs_leaf7[3] & (1 << 15));

if (isIntel && isHybrid) {
// We want to use the number of physical cores, but exclude soc cores
// On Intel Hybrid processors, numSocCores == cores.Num2CacheCores
return cores.PhysicalCores - cores.Num2CacheCores;
// We want to use the number of physical cores, but exclude cores without an LLC
return cores.LLCCores;
}
#endif

Expand Down
Loading