Only define CPUIDInfo::pytorch_cpuinfo_init_ data member when CPUINFO…

…_SUPPORTED is defined. (#20509) Only define CPUIDInfo::pytorch_cpuinfo_init_ data member when CPUINFO_SUPPORTED is defined. It can cause unused variable warnings in some compilations.
microsoft · Apr 30, 2024 · a7fc0e8 · a7fc0e8
1 parent 33e883f
commit a7fc0e8
Show file tree

Hide file tree

Showing 3 changed files with 77 additions and 56 deletions.
diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc
@@ -40,7 +40,7 @@
 
 #if _WIN32
 
-#include "Windows.h"
+#include <Windows.h>
 
 #define HAS_WINDOWS_DESKTOP WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
 
@@ -63,16 +63,17 @@ void decodeMIDR(uint32_t midr, uint32_t uarch[1]);
 #include "core/common/cpuid_uarch.h"
 #endif  // CPUINFO_SUPPORTED
 
-namespace onnxruntime {
-
-#ifdef CPUIDINFO_ARCH_X86
-
-#include <memory>
+#if defined(CPUIDINFO_ARCH_X86)
 #if defined(_MSC_VER)
 #include <intrin.h>
 #elif defined(__GNUC__)
 #include <cpuid.h>
 #endif
+#endif  // defined(CPUIDINFO_ARCH_X86)
+
+namespace onnxruntime {
+
+#ifdef CPUIDINFO_ARCH_X86
 
 static inline void GetCPUID(int function_id, int data[4]) {  // NOLINT
 #if defined(_MSC_VER)
@@ -146,46 +147,47 @@ void CPUIDInfo::X86Init() {
 #ifdef __linux__
 
 void CPUIDInfo::ArmLinuxInit() {
-  // Pytorch CPUINFO only works on ARM linux or android
   // Assuming no hyper-threading, no NUMA groups
-#ifdef CPUINFO_SUPPORTED
-  is_hybrid_ = cpuinfo_get_uarchs_count() > 1;
-  has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot();
-  has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
-  has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm();
-  has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && cpuinfo_has_arm_i8mm();
-  has_arm_neon_bf16_ = cpuinfo_has_arm_neon_bf16();
-
-  const uint32_t core_cnt = cpuinfo_get_cores_count();
-  core_uarchs_.resize(core_cnt, cpuinfo_uarch_unknown);
-  is_armv8_narrow_ld_.resize(core_cnt, false);
-  for (uint32_t c = 0; c < core_cnt; c++) {
-    const struct cpuinfo_processor* proc = cpuinfo_get_processor(c);
-    if (proc == nullptr) {
-      continue;
-    }
-    const struct cpuinfo_core* corep = proc->core;
-    if (corep == nullptr) {
-      continue;
-    }
-    auto coreid = proc->linux_id;
-    auto uarch = corep->uarch;
-    core_uarchs_[coreid] = uarch;
-    if (uarch == cpuinfo_uarch_cortex_a53 || uarch == cpuinfo_uarch_cortex_a55r0 ||
-        uarch == cpuinfo_uarch_cortex_a55) {
-      is_armv8_narrow_ld_[coreid] = true;
+#if defined(CPUINFO_SUPPORTED)
+  if (pytorch_cpuinfo_init_) {
+    is_hybrid_ = cpuinfo_get_uarchs_count() > 1;
+    has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot();
+    has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
+    has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm();
+    has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && cpuinfo_has_arm_i8mm();
+    has_arm_neon_bf16_ = cpuinfo_has_arm_neon_bf16();
+
+    const uint32_t core_cnt = cpuinfo_get_cores_count();
+    core_uarchs_.resize(core_cnt, cpuinfo_uarch_unknown);
+    is_armv8_narrow_ld_.resize(core_cnt, false);
+    for (uint32_t c = 0; c < core_cnt; c++) {
+      const struct cpuinfo_processor* proc = cpuinfo_get_processor(c);
+      if (proc == nullptr) {
+        continue;
+      }
+      const struct cpuinfo_core* corep = proc->core;
+      if (corep == nullptr) {
+        continue;
+      }
+      auto coreid = proc->linux_id;
+      auto uarch = corep->uarch;
+      core_uarchs_[coreid] = uarch;
+      if (uarch == cpuinfo_uarch_cortex_a53 || uarch == cpuinfo_uarch_cortex_a55r0 ||
+          uarch == cpuinfo_uarch_cortex_a55) {
+        is_armv8_narrow_ld_[coreid] = true;
+      }
     }
-  }
-#else
-  pytorch_cpuinfo_init_ = false;
-  has_arm_neon_dot_ = ((getauxval(AT_HWCAP) & HWCAP_ASIMDDP) != 0);
-  has_fp16_ |= has_arm_neon_dot_;
+  } else
+#endif  // defined(CPUINFO_SUPPORTED)
+  {
+    has_arm_neon_dot_ = ((getauxval(AT_HWCAP) & HWCAP_ASIMDDP) != 0);
+    has_fp16_ |= has_arm_neon_dot_;
 
-  has_arm_neon_i8mm_ = ((getauxval(AT_HWCAP2) & HWCAP2_I8MM) != 0);
-  has_arm_sve_i8mm_ = ((getauxval(AT_HWCAP2) & HWCAP2_SVEI8MM) != 0);
+    has_arm_neon_i8mm_ = ((getauxval(AT_HWCAP2) & HWCAP2_I8MM) != 0);
+    has_arm_sve_i8mm_ = ((getauxval(AT_HWCAP2) & HWCAP2_SVEI8MM) != 0);
 
-  has_arm_neon_bf16_ = ((getauxval(AT_HWCAP2) & HWCAP2_BF16) != 0);
-#endif
+    has_arm_neon_bf16_ = ((getauxval(AT_HWCAP2) & HWCAP2_BF16) != 0);
+  }
 }
 
 #elif defined(_WIN32)
@@ -248,12 +250,15 @@ void CPUIDInfo::ArmWindowsInit() {
   has_arm_neon_dot_ = false;
 #endif
 
+#if defined(CPUINFO_SUPPORTED)
   if (pytorch_cpuinfo_init_) {
     has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
     has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm();
     has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && cpuinfo_has_arm_i8mm();
     has_arm_neon_bf16_ = cpuinfo_has_arm_neon_bf16();
-  } else {
+  } else
+#endif  // defined(CPUINFO_SUPPORTED)
+  {
     has_fp16_ = false;
     has_arm_neon_i8mm_ = false;
     has_arm_sve_i8mm_ = false;
@@ -278,21 +283,23 @@ uint32_t CPUIDInfo::GetCurrentCoreIdx() const {
   return 0xFFFFFFFF;  // don't know how to get core index
 #endif
 }
+
 CPUIDInfo::CPUIDInfo() {
 #ifdef CPUIDINFO_ARCH_X86
   X86Init();
 #elif defined(CPUIDINFO_ARCH_ARM)
-#if CPUINFO_SUPPORTED
+#if defined(CPUINFO_SUPPORTED)
   pytorch_cpuinfo_init_ = cpuinfo_initialize();
   if (!pytorch_cpuinfo_init_) {
-    LOGS_DEFAULT(WARNING) << "Failed to init pytorch cpuinfo library, may cause CPU EP performance degradation due to undetected CPU features.";
+    LOGS_DEFAULT(WARNING) << "Failed to initialize PyTorch cpuinfo library. May cause CPU EP performance degradation "
+                             "due to undetected CPU features.";
   }
-#endif
+#endif  // defined(CPUINFO_SUPPORTED)
 #ifdef __linux__
   ArmLinuxInit();
 #elif defined(_WIN32)
   ArmWindowsInit();
-#endif /* (arm or arm64) and windows */
-#endif
+#endif  /* (arm or arm64) and windows */
+#endif  // defined(CPUIDINFO_ARCH_ARM)
 }
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h
@@ -118,13 +118,16 @@ class CPUIDInfo {
   bool has_arm_sve_i8mm_{false};
   bool has_arm_neon_bf16_{false};
 
-#ifdef CPUIDINFO_ARCH_X86
+#if defined(CPUIDINFO_ARCH_X86)
 
   void X86Init();
+
 #elif defined(CPUIDINFO_ARCH_ARM)
-  // Now the following var is only used in ARM build, but later one we may expand the usage.
-  [[maybe_unused]] bool pytorch_cpuinfo_init_{false};
-#endif
+
+#if defined(CPUINFO_SUPPORTED)
+  // Now the following var is only used in ARM build, but later on we may expand the usage.
+  bool pytorch_cpuinfo_init_{false};
+#endif  // defined(CPUINFO_SUPPORTED)
 
 #ifdef __linux__
 
@@ -135,6 +138,8 @@ class CPUIDInfo {
   void ArmWindowsInit();
 
 #endif /* (arm or arm64) and windows */
+
+#endif  // defined(CPUIDINFO_ARCH_ARM)
 };
 
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/optimizer/nhwc_transformer_test.cc b/onnxruntime/test/optimizer/nhwc_transformer_test.cc
@@ -544,7 +544,16 @@ static NodeArg* MakeInitializerARangeFP16(ModelTestBuilder& builder, const std::
   return builder.MakeInitializer<MLFloat16>(shape, ARangeOfFP16Values(shape, min, max));
 }
 
-TEST(NhwcTransformerTests, ConvFp16) {
+class NhwcTransformerTestsFp16 : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    if (!MlasFp16AccelerationSupported()) {
+      GTEST_SKIP() << "Skipping test because FP16 acceleration support was not detected.";
+    }
+  }
+};
+
+TEST_F(NhwcTransformerTestsFp16, ConvFp16) {
   auto test_case = [&](const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
     auto build_test_case = [&](ModelTestBuilder& builder) {
       auto* input_arg = MakeInputARangeFP16(builder, input_shape, MLFloat16(-1.5f), MLFloat16(1.5f));
@@ -572,7 +581,7 @@ TEST(NhwcTransformerTests, ConvFp16) {
   test_case({1, 22, 11, 13, 15}, {30, 22, 5, 3, 3});
 }
 
-TEST(NhwcTransformerTests, ConvMaxPoolFp16) {
+TEST_F(NhwcTransformerTestsFp16, ConvMaxPoolFp16) {
   auto test_case = [&](const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
     auto build_test_case = [&](ModelTestBuilder& builder) {
       auto* input_arg = MakeInputARangeFP16(builder, input_shape, MLFloat16(-1.5f), MLFloat16(1.5f));
@@ -607,7 +616,7 @@ TEST(NhwcTransformerTests, ConvMaxPoolFp16) {
   test_case({1, 15, 11, 13, 15}, {31, 15, 5, 3, 3});
 }
 
-TEST(NhwcTransformerTests, ConvGlobalAveragePoolFp16) {
+TEST_F(NhwcTransformerTestsFp16, ConvGlobalAveragePoolFp16) {
   auto build_test_case = [&](ModelTestBuilder& builder) {
     auto* input_arg = MakeInputARangeFP16(builder, {1, 23, 13, 13}, MLFloat16(-1.5f), MLFloat16(1.5f));
     auto* conv1_output_arg = builder.MakeIntermediate();
@@ -638,7 +647,7 @@ TEST(NhwcTransformerTests, ConvGlobalAveragePoolFp16) {
                     TransformerLevel::Level3);
 }
 
-TEST(NhwcTransformerTests, ConvAveragePoolFp16) {
+TEST_F(NhwcTransformerTestsFp16, ConvAveragePoolFp16) {
   auto build_test_case = [&](ModelTestBuilder& builder) {
     auto* input_arg = MakeInputARangeFP16(builder, {1, 23, 13, 13}, MLFloat16(-1.5f), MLFloat16(1.5f));
     auto* conv1_output_arg = builder.MakeIntermediate();