Skip to content

Commit

Permalink
Only define CPUIDInfo::pytorch_cpuinfo_init_ data member when CPUINFO…
Browse files Browse the repository at this point in the history
…_SUPPORTED is defined. (#20509)

Only define CPUIDInfo::pytorch_cpuinfo_init_ data member when CPUINFO_SUPPORTED is defined. It can cause unused variable warnings in some compilations.
  • Loading branch information
edgchen1 authored Apr 30, 2024
1 parent 33e883f commit a7fc0e8
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 56 deletions.
103 changes: 55 additions & 48 deletions onnxruntime/core/common/cpuid_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

#if _WIN32

#include "Windows.h"
#include <Windows.h>

#define HAS_WINDOWS_DESKTOP WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)

Expand All @@ -63,16 +63,17 @@ void decodeMIDR(uint32_t midr, uint32_t uarch[1]);
#include "core/common/cpuid_uarch.h"
#endif // CPUINFO_SUPPORTED

namespace onnxruntime {

#ifdef CPUIDINFO_ARCH_X86

#include <memory>
#if defined(CPUIDINFO_ARCH_X86)
#if defined(_MSC_VER)
#include <intrin.h>
#elif defined(__GNUC__)
#include <cpuid.h>
#endif
#endif // defined(CPUIDINFO_ARCH_X86)

namespace onnxruntime {

#ifdef CPUIDINFO_ARCH_X86

static inline void GetCPUID(int function_id, int data[4]) { // NOLINT
#if defined(_MSC_VER)
Expand Down Expand Up @@ -146,46 +147,47 @@ void CPUIDInfo::X86Init() {
#ifdef __linux__

void CPUIDInfo::ArmLinuxInit() {
// Pytorch CPUINFO only works on ARM linux or android
// Assuming no hyper-threading, no NUMA groups
#ifdef CPUINFO_SUPPORTED
is_hybrid_ = cpuinfo_get_uarchs_count() > 1;
has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot();
has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm();
has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && cpuinfo_has_arm_i8mm();
has_arm_neon_bf16_ = cpuinfo_has_arm_neon_bf16();

const uint32_t core_cnt = cpuinfo_get_cores_count();
core_uarchs_.resize(core_cnt, cpuinfo_uarch_unknown);
is_armv8_narrow_ld_.resize(core_cnt, false);
for (uint32_t c = 0; c < core_cnt; c++) {
const struct cpuinfo_processor* proc = cpuinfo_get_processor(c);
if (proc == nullptr) {
continue;
}
const struct cpuinfo_core* corep = proc->core;
if (corep == nullptr) {
continue;
}
auto coreid = proc->linux_id;
auto uarch = corep->uarch;
core_uarchs_[coreid] = uarch;
if (uarch == cpuinfo_uarch_cortex_a53 || uarch == cpuinfo_uarch_cortex_a55r0 ||
uarch == cpuinfo_uarch_cortex_a55) {
is_armv8_narrow_ld_[coreid] = true;
#if defined(CPUINFO_SUPPORTED)
if (pytorch_cpuinfo_init_) {
is_hybrid_ = cpuinfo_get_uarchs_count() > 1;
has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot();
has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm();
has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && cpuinfo_has_arm_i8mm();
has_arm_neon_bf16_ = cpuinfo_has_arm_neon_bf16();

const uint32_t core_cnt = cpuinfo_get_cores_count();
core_uarchs_.resize(core_cnt, cpuinfo_uarch_unknown);
is_armv8_narrow_ld_.resize(core_cnt, false);
for (uint32_t c = 0; c < core_cnt; c++) {
const struct cpuinfo_processor* proc = cpuinfo_get_processor(c);
if (proc == nullptr) {
continue;
}
const struct cpuinfo_core* corep = proc->core;
if (corep == nullptr) {
continue;
}
auto coreid = proc->linux_id;
auto uarch = corep->uarch;
core_uarchs_[coreid] = uarch;
if (uarch == cpuinfo_uarch_cortex_a53 || uarch == cpuinfo_uarch_cortex_a55r0 ||
uarch == cpuinfo_uarch_cortex_a55) {
is_armv8_narrow_ld_[coreid] = true;
}
}
}
#else
pytorch_cpuinfo_init_ = false;
has_arm_neon_dot_ = ((getauxval(AT_HWCAP) & HWCAP_ASIMDDP) != 0);
has_fp16_ |= has_arm_neon_dot_;
} else
#endif // defined(CPUINFO_SUPPORTED)
{
has_arm_neon_dot_ = ((getauxval(AT_HWCAP) & HWCAP_ASIMDDP) != 0);
has_fp16_ |= has_arm_neon_dot_;

has_arm_neon_i8mm_ = ((getauxval(AT_HWCAP2) & HWCAP2_I8MM) != 0);
has_arm_sve_i8mm_ = ((getauxval(AT_HWCAP2) & HWCAP2_SVEI8MM) != 0);
has_arm_neon_i8mm_ = ((getauxval(AT_HWCAP2) & HWCAP2_I8MM) != 0);
has_arm_sve_i8mm_ = ((getauxval(AT_HWCAP2) & HWCAP2_SVEI8MM) != 0);

has_arm_neon_bf16_ = ((getauxval(AT_HWCAP2) & HWCAP2_BF16) != 0);
#endif
has_arm_neon_bf16_ = ((getauxval(AT_HWCAP2) & HWCAP2_BF16) != 0);
}
}

#elif defined(_WIN32)
Expand Down Expand Up @@ -248,12 +250,15 @@ void CPUIDInfo::ArmWindowsInit() {
has_arm_neon_dot_ = false;
#endif

#if defined(CPUINFO_SUPPORTED)
if (pytorch_cpuinfo_init_) {
has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm();
has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && cpuinfo_has_arm_i8mm();
has_arm_neon_bf16_ = cpuinfo_has_arm_neon_bf16();
} else {
} else
#endif // defined(CPUINFO_SUPPORTED)
{
has_fp16_ = false;
has_arm_neon_i8mm_ = false;
has_arm_sve_i8mm_ = false;
Expand All @@ -278,21 +283,23 @@ uint32_t CPUIDInfo::GetCurrentCoreIdx() const {
return 0xFFFFFFFF; // don't know how to get core index
#endif
}

CPUIDInfo::CPUIDInfo() {
#ifdef CPUIDINFO_ARCH_X86
X86Init();
#elif defined(CPUIDINFO_ARCH_ARM)
#if CPUINFO_SUPPORTED
#if defined(CPUINFO_SUPPORTED)
pytorch_cpuinfo_init_ = cpuinfo_initialize();
if (!pytorch_cpuinfo_init_) {
LOGS_DEFAULT(WARNING) << "Failed to init pytorch cpuinfo library, may cause CPU EP performance degradation due to undetected CPU features.";
LOGS_DEFAULT(WARNING) << "Failed to initialize PyTorch cpuinfo library. May cause CPU EP performance degradation "
"due to undetected CPU features.";
}
#endif
#endif // defined(CPUINFO_SUPPORTED)
#ifdef __linux__
ArmLinuxInit();
#elif defined(_WIN32)
ArmWindowsInit();
#endif /* (arm or arm64) and windows */
#endif
#endif /* (arm or arm64) and windows */
#endif // defined(CPUIDINFO_ARCH_ARM)
}
} // namespace onnxruntime
13 changes: 9 additions & 4 deletions onnxruntime/core/common/cpuid_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,16 @@ class CPUIDInfo {
bool has_arm_sve_i8mm_{false};
bool has_arm_neon_bf16_{false};

#ifdef CPUIDINFO_ARCH_X86
#if defined(CPUIDINFO_ARCH_X86)

void X86Init();

#elif defined(CPUIDINFO_ARCH_ARM)
// Now the following var is only used in ARM build, but later one we may expand the usage.
[[maybe_unused]] bool pytorch_cpuinfo_init_{false};
#endif

#if defined(CPUINFO_SUPPORTED)
// Now the following var is only used in ARM build, but later on we may expand the usage.
bool pytorch_cpuinfo_init_{false};
#endif // defined(CPUINFO_SUPPORTED)

#ifdef __linux__

Expand All @@ -135,6 +138,8 @@ class CPUIDInfo {
void ArmWindowsInit();

#endif /* (arm or arm64) and windows */

#endif // defined(CPUIDINFO_ARCH_ARM)
};

} // namespace onnxruntime
17 changes: 13 additions & 4 deletions onnxruntime/test/optimizer/nhwc_transformer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,16 @@ static NodeArg* MakeInitializerARangeFP16(ModelTestBuilder& builder, const std::
return builder.MakeInitializer<MLFloat16>(shape, ARangeOfFP16Values(shape, min, max));
}

TEST(NhwcTransformerTests, ConvFp16) {
class NhwcTransformerTestsFp16 : public ::testing::Test {
protected:
void SetUp() override {
if (!MlasFp16AccelerationSupported()) {
GTEST_SKIP() << "Skipping test because FP16 acceleration support was not detected.";
}
}
};

TEST_F(NhwcTransformerTestsFp16, ConvFp16) {
auto test_case = [&](const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
auto build_test_case = [&](ModelTestBuilder& builder) {
auto* input_arg = MakeInputARangeFP16(builder, input_shape, MLFloat16(-1.5f), MLFloat16(1.5f));
Expand Down Expand Up @@ -572,7 +581,7 @@ TEST(NhwcTransformerTests, ConvFp16) {
test_case({1, 22, 11, 13, 15}, {30, 22, 5, 3, 3});
}

TEST(NhwcTransformerTests, ConvMaxPoolFp16) {
TEST_F(NhwcTransformerTestsFp16, ConvMaxPoolFp16) {
auto test_case = [&](const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
auto build_test_case = [&](ModelTestBuilder& builder) {
auto* input_arg = MakeInputARangeFP16(builder, input_shape, MLFloat16(-1.5f), MLFloat16(1.5f));
Expand Down Expand Up @@ -607,7 +616,7 @@ TEST(NhwcTransformerTests, ConvMaxPoolFp16) {
test_case({1, 15, 11, 13, 15}, {31, 15, 5, 3, 3});
}

TEST(NhwcTransformerTests, ConvGlobalAveragePoolFp16) {
TEST_F(NhwcTransformerTestsFp16, ConvGlobalAveragePoolFp16) {
auto build_test_case = [&](ModelTestBuilder& builder) {
auto* input_arg = MakeInputARangeFP16(builder, {1, 23, 13, 13}, MLFloat16(-1.5f), MLFloat16(1.5f));
auto* conv1_output_arg = builder.MakeIntermediate();
Expand Down Expand Up @@ -638,7 +647,7 @@ TEST(NhwcTransformerTests, ConvGlobalAveragePoolFp16) {
TransformerLevel::Level3);
}

TEST(NhwcTransformerTests, ConvAveragePoolFp16) {
TEST_F(NhwcTransformerTestsFp16, ConvAveragePoolFp16) {
auto build_test_case = [&](ModelTestBuilder& builder) {
auto* input_arg = MakeInputARangeFP16(builder, {1, 23, 13, 13}, MLFloat16(-1.5f), MLFloat16(1.5f));
auto* conv1_output_arg = builder.MakeIntermediate();
Expand Down

0 comments on commit a7fc0e8

Please sign in to comment.