Skip to content

Commit

Permalink
Enable CPUINFO for all Windows build (#19655)
Browse files Browse the repository at this point in the history
### Description
It was disabled in PR #9065. And the reason was:
" api-ms-win-core-kernel32-legacy-*.dll wasn't available in Windows 8
and was added in Windows 10, so cpuinfo breaks our Windows 8 support.
I'm disabling it again."

We no longer support Windows 8.  Therefore we can add CPUINFO back.

### Motivation and Context
To make the code simpler. If in any case the library doesn't work as
expected, we can submit a PR to their code base and fix it.
  • Loading branch information
snnn authored Mar 2, 2024
1 parent f06164e commit a0521f8
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 73 deletions.
9 changes: 1 addition & 8 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -256,14 +256,7 @@ if (onnxruntime_ENABLE_CPUINFO)
set(CPUINFO_SUPPORTED TRUE)
endif()
if (WIN32)
# Exclude Windows ARM build and Windows Store
if (${onnxruntime_target_platform} MATCHES "^(ARM.*|arm.*)$" )
message(WARNING "Cpuinfo not included for compilation problems with Windows ARM.")
set(CPUINFO_SUPPORTED FALSE)
elseif (WIN32 AND NOT CMAKE_CXX_STANDARD_LIBRARIES MATCHES kernel32.lib)
message(WARNING "Cpuinfo not included non-Desktop builds")
set(CPUINFO_SUPPORTED FALSE)
endif()
set(CPUINFO_SUPPORTED TRUE)
elseif (NOT ${onnxruntime_target_platform} MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64)$")
message(WARNING
"Target processor architecture \"${onnxruntime_target_platform}\" is not supported in cpuinfo. "
Expand Down
5 changes: 0 additions & 5 deletions cmake/onnxruntime_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -201,18 +201,13 @@ endif()


if (RISCV64 OR ARM64 OR ARM OR X86 OR X64 OR X86_64)
if((WIN32 AND NOT CMAKE_CXX_STANDARD_LIBRARIES MATCHES kernel32.lib) OR ((ARM64 OR ARM) AND MSVC))
# msvc compiler report syntax error with cpuinfo arm source files
# and cpuinfo does not have code for getting arm uarch info under windows
else()
# Link cpuinfo if supported
# Using it mainly in ARM with Android.
# Its functionality in detecting x86 cpu features are lacking, so is support for Windows.
if (CPUINFO_SUPPORTED)
onnxruntime_add_include_to_target(onnxruntime_common cpuinfo::cpuinfo)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES cpuinfo::cpuinfo ${ONNXRUNTIME_CLOG_TARGET_NAME})
endif()
endif()
endif()

if (NOT onnxruntime_BUILD_SHARED_LIB)
Expand Down
82 changes: 36 additions & 46 deletions onnxruntime/core/common/cpuid_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@

#if defined(CPUINFO_SUPPORTED)
#include <cpuinfo.h>
#if defined(CPUIDINFO_ARCH_ARM)
namespace onnxruntime {
// The following function is declared in "core/common/cpuid_uarch.h" but we cannot include the whole header file because
// some of its symbols are conflict with <cpuinfo.h>
void decodeMIDR(uint32_t midr, uint32_t uarch[1]);
} // namespace onnxruntime
#endif
#else
#include "core/common/cpuid_uarch.h"
#endif // CPUINFO_SUPPORTED
Expand Down Expand Up @@ -142,11 +149,6 @@ void CPUIDInfo::ArmLinuxInit() {
// Pytorch CPUINFO only works on ARM linux or android
// Assuming no hyper-threading, no NUMA groups
#ifdef CPUINFO_SUPPORTED
pytorch_cpuinfo_init_ = cpuinfo_initialize();
if (!pytorch_cpuinfo_init_) {
LOGS_DEFAULT(WARNING) << "Failed to init pytorch cpuinfo library, may cause CPU EP performance degradation due to undetected CPU features.";
return;
}
is_hybrid_ = cpuinfo_get_uarchs_count() > 1;
has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot();
has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
Expand Down Expand Up @@ -239,52 +241,24 @@ void CPUIDInfo::ArmWindowsInit() {
lastUarch = uarch;
}
}

switch (lastUarch) {
case cpuinfo_uarch_cortex_a55:
case cpuinfo_uarch_cortex_a55r0:
case cpuinfo_uarch_cortex_a76:
case cpuinfo_uarch_neoverse_n1:
case cpuinfo_uarch_cortex_a77:
case cpuinfo_uarch_exynos_m4:
case cpuinfo_uarch_exynos_m5:
has_fp16_ = true;
break;
default:
break;
}
if (!has_fp16_) {
/*
* Detecting fp16 support. Different cores should have the same instruction set.
* So we just check the first ID_AA64PFR0_EL1
* Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0100), Op2(0b000),
*/
uint64_t ID_AA64PFR0_EL1;
unsigned long valsize = sizeof(uint64_t);
auto retCode = ::RegGetValueA(
HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
"CP 4020", RRF_RT_REG_QWORD, nullptr,
&ID_AA64PFR0_EL1, &valsize);
if (retCode == ERROR_SUCCESS) {
// AdvSIMD, bits [23:20]
auto advSimd = ID_AA64PFR0_EL1 >> 20;
if ((advSimd & 0xfULL) == 1) {
has_fp16_ = true;
}
}
}
#endif /* Application Family or OneCore Family */

has_arm_neon_dot_ = (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0);
#else
has_arm_neon_dot_ = false;
#endif
has_fp16_ |= has_arm_neon_dot_;
/* TODO: implement them when hw+sw is available for testing these features */
has_arm_neon_i8mm_ = false;
has_arm_sve_i8mm_ = false;
has_arm_neon_bf16_ = false;

if (pytorch_cpuinfo_init_) {
has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm();
has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && cpuinfo_has_arm_i8mm();
has_arm_neon_bf16_ = cpuinfo_has_arm_neon_bf16();
} else {
has_fp16_ = false;
has_arm_neon_i8mm_ = false;
has_arm_sve_i8mm_ = false;
has_arm_neon_bf16_ = false;
}
}

#endif /* (arm or arm64) and windows */
Expand All @@ -304,5 +278,21 @@ uint32_t CPUIDInfo::GetCurrentCoreIdx() const {
return 0xFFFFFFFF; // don't know how to get core index
#endif
}

CPUIDInfo::CPUIDInfo() {
#ifdef CPUIDINFO_ARCH_X86
X86Init();
#elif defined(CPUIDINFO_ARCH_ARM)
#if CPUINFO_SUPPORTED
pytorch_cpuinfo_init_ = cpuinfo_initialize();
if (!pytorch_cpuinfo_init_) {
LOGS_DEFAULT(WARNING) << "Failed to init pytorch cpuinfo library, may cause CPU EP performance degradation due to undetected CPU features.";
}
#endif
#ifdef __linux__
ArmLinuxInit();
#elif defined(_WIN32)
ArmWindowsInit();
#endif /* (arm or arm64) and windows */
#endif
}
} // namespace onnxruntime
19 changes: 5 additions & 14 deletions onnxruntime/core/common/cpuid_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,7 @@ class CPUIDInfo {
}

private:
CPUIDInfo() {
#ifdef CPUIDINFO_ARCH_X86
X86Init();
#elif defined(CPUIDINFO_ARCH_ARM)
#ifdef __linux__
ArmLinuxInit();
#elif defined(_WIN32)
ArmWindowsInit();
#endif /* (arm or arm64) and windows */
#endif
}
CPUIDInfo();
bool has_amx_bf16_{false};
bool has_avx_{false};
bool has_avx2_{false};
Expand Down Expand Up @@ -131,19 +121,20 @@ class CPUIDInfo {
#ifdef CPUIDINFO_ARCH_X86

void X86Init();

#elif defined(CPUIDINFO_ARCH_ARM)
// Now the following var is only used in ARM build, but later one we may expand the usage.
bool pytorch_cpuinfo_init_{false};
#endif

#ifdef __linux__

bool pytorch_cpuinfo_init_{false};
void ArmLinuxInit();

#elif defined(_WIN32)

void ArmWindowsInit();

#endif /* (arm or arm64) and windows */
#endif
};

} // namespace onnxruntime

0 comments on commit a0521f8

Please sign in to comment.