From 46f899627ad2350910d1565e190ee126e6aff9b7 Mon Sep 17 00:00:00 2001 From: Jean-Simon Lapointe Date: Fri, 5 Apr 2024 16:19:14 -0400 Subject: [PATCH] Allow configuration template to disable some SIMD. --- onnxruntime/core/mlas/lib/convsym.cpp | 6 ++++-- onnxruntime/core/mlas/lib/platform.cpp | 13 ++++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/mlas/lib/convsym.cpp b/onnxruntime/core/mlas/lib/convsym.cpp index 5f8be3580bb72..eaecf7b2b54ac 100644 --- a/onnxruntime/core/mlas/lib/convsym.cpp +++ b/onnxruntime/core/mlas/lib/convsym.cpp @@ -163,6 +163,7 @@ struct MLAS_CONV_SYM_DISPATCH { #if defined(MLAS_TARGET_AMD64) +#if !defined(ORT_DISABLE_AVX2) const MLAS_CONV_SYM_DISPATCH MlasConvSymDispatchAvx2 = { MlasConvSymKernelAvx2, MlasConvSymDepthwiseKernelAvx2, @@ -194,8 +195,9 @@ const MLAS_CONV_SYM_DISPATCH MlasConvSymDispatchAvxVnni = { 4, // KernelDepthwiseOutputCount false, // FixupInputZeroPoint }; +#endif // !defined(ORT_DISABLE_AVX2) -#if !defined(ORT_MINIMAL_BUILD) +#if !defined(ORT_MINIMAL_BUILD) && !defined(ORT_DISABLE_AVX512) const MLAS_CONV_SYM_DISPATCH MlasConvSymDispatchAvx512Core = { MlasConvSymKernelAvx512Core, @@ -229,7 +231,7 @@ const MLAS_CONV_SYM_DISPATCH MlasConvSymDispatchAvx512Vnni = { false, // FixupInputZeroPoint }; -#endif // ORT_MINIMAL_BUILD +#endif // !defined(ORT_MINIMAL_BUILD) && !defined(ORT_DISABLE_AVX512) #elif defined(MLAS_TARGET_ARM64) const MLAS_CONV_SYM_DISPATCH MlasConvSymU8DispatchNeon = { diff --git a/onnxruntime/core/mlas/lib/platform.cpp b/onnxruntime/core/mlas/lib/platform.cpp index 859b7c2f560a4..0287418ec01b3 100644 --- a/onnxruntime/core/mlas/lib/platform.cpp +++ b/onnxruntime/core/mlas/lib/platform.cpp @@ -298,7 +298,7 @@ Return Value: __cpuid(1, Cpuid1[0], Cpuid1[1], Cpuid1[2], Cpuid1[3]); #endif -#if defined(_MSC_VER) +#if defined(_MSC_VER) && !defined(ORT_DISABLE_SSE4) // // Check if the processor supports SSE 4.1 instructions. @@ -324,6 +324,7 @@ Return Value: if ((xcr0 & 0x6) == 0x6) { +#if !defined(ORT_DISABLE_AVX) this->GemmFloatKernel = MlasGemmFloatKernelAvx; #if defined(MLAS_TARGET_AMD64) @@ -356,6 +357,7 @@ Return Value: __cpuid_count(7, 0, Cpuid7[0], Cpuid7[1], Cpuid7[2], Cpuid7[3]); #endif +#if !defined(ORT_DISABLE_AVX2) if (((Cpuid1[2] & 0x1000) != 0) && ((Cpuid7[1] & 0x20) != 0)) { this->GemmU8S8Dispatch = &MlasGemmU8S8DispatchAvx2; @@ -413,6 +415,7 @@ Return Value: #if !defined(ORT_MINIMAL_BUILD) +#if !defined(ORT_DISABLE_AVX512) // // Check if the processor supports AVX512F features and the // operating system supports saving AVX512F state. @@ -466,8 +469,9 @@ Return Value: } } } +#endif // !defined(ORT_DISABLE_AVX512) -#ifndef __APPLE__ +#if !defined(__APPLE__) && !defined(ORT_DISABLE_AMX) // // Check if the processor supports AMX-TILE and AMX-INT8 // features. @@ -480,13 +484,16 @@ Return Value: this->GemmU8S8Dispatch = &MlasGemmU8S8DispatchAmx; } } -#endif // __APPLE__ +#endif // !defined(__APPLE__) && !defined(ORT_DISABLE_AMX) #endif // ORT_MINIMAL_BUILD } +#endif // !defined(ORT_DISABLE_AVX2) #endif // MLAS_TARGET_AMD64 +#endif // !defined(ORT_DISABLE_AVX) + } }