Skip to content

Commit

Permalink
Allow configuration template to disable some SIMD.
Browse files Browse the repository at this point in the history
  • Loading branch information
jslap-ubi committed Sep 23, 2024
1 parent 9b37b3e commit d0aada7
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 6 deletions.
6 changes: 4 additions & 2 deletions onnxruntime/core/mlas/lib/convsym.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ struct MLAS_CONV_SYM_DISPATCH {

#if defined(MLAS_TARGET_AMD64)

#if !defined(ORT_DISABLE_AVX2)
const MLAS_CONV_SYM_DISPATCH MlasConvSymDispatchAvx2 = {
MlasConvSymKernelAvx2,
MlasConvSymDepthwiseKernelAvx2,
Expand Down Expand Up @@ -194,8 +195,9 @@ const MLAS_CONV_SYM_DISPATCH MlasConvSymDispatchAvxVnni = {
4, // KernelDepthwiseOutputCount
false, // FixupInputZeroPoint
};
#endif // !defined(ORT_DISABLE_AVX2)

#if !defined(ORT_MINIMAL_BUILD)
#if !defined(ORT_MINIMAL_BUILD) && !defined(ORT_DISABLE_AVX512)

const MLAS_CONV_SYM_DISPATCH MlasConvSymDispatchAvx512Core = {
MlasConvSymKernelAvx512Core,
Expand Down Expand Up @@ -229,7 +231,7 @@ const MLAS_CONV_SYM_DISPATCH MlasConvSymDispatchAvx512Vnni = {
false, // FixupInputZeroPoint
};

#endif // ORT_MINIMAL_BUILD
#endif // !defined(ORT_MINIMAL_BUILD) && !defined(ORT_DISABLE_AVX512)

#elif defined(MLAS_TARGET_ARM64)
const MLAS_CONV_SYM_DISPATCH MlasConvSymU8DispatchNeon = {
Expand Down
15 changes: 11 additions & 4 deletions onnxruntime/core/mlas/lib/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Module Name:
#include <thread>
#include <mutex>

#if defined(MLAS_TARGET_POWER)
#if defined(MLAS_TARGET_POWER)
#if defined(__linux__)
#include <sys/auxv.h>
#elif defined(_AIX)
Expand Down Expand Up @@ -303,7 +303,7 @@ Return Value:
__cpuid(1, Cpuid1[0], Cpuid1[1], Cpuid1[2], Cpuid1[3]);
#endif

#if defined(_MSC_VER)
#if defined(_MSC_VER) && !defined(ORT_DISABLE_SSE4)

//
// Check if the processor supports SSE 4.1 instructions.
Expand All @@ -329,6 +329,7 @@ Return Value:

if ((xcr0 & 0x6) == 0x6) {

#if !defined(ORT_DISABLE_AVX)
this->GemmFloatKernel = MlasGemmFloatKernelAvx;

#if defined(MLAS_TARGET_AMD64)
Expand Down Expand Up @@ -361,6 +362,7 @@ Return Value:
__cpuid_count(7, 0, Cpuid7[0], Cpuid7[1], Cpuid7[2], Cpuid7[3]);
#endif

#if !defined(ORT_DISABLE_AVX2)
if (((Cpuid1[2] & 0x1000) != 0) && ((Cpuid7[1] & 0x20) != 0)) {

this->GemmU8S8Dispatch = &MlasGemmU8S8DispatchAvx2;
Expand Down Expand Up @@ -422,6 +424,7 @@ Return Value:

#if !defined(ORT_MINIMAL_BUILD)

#if !defined(ORT_DISABLE_AVX512)
//
// Check if the processor supports AVX512F features and the
// operating system supports saving AVX512F state.
Expand Down Expand Up @@ -475,6 +478,7 @@ Return Value:
}
}
}
#endif // !defined(ORT_DISABLE_AVX512)

//
// Check if the processor supports AVX-VNNI-INT8
Expand All @@ -487,7 +491,7 @@ Return Value:
this->GemmS8U8Kernel = MlasGemmS8U8KernelAvx2Vnni;
}

#ifndef __APPLE__
#if !defined(__APPLE__) && !defined(ORT_DISABLE_AMX)
#if (defined(_MSC_VER) && (_MSC_VER >= 1933)) || (defined(__GNUC__) && (__GNUC__ >= 13))
//
// Check if the processor supports AVX NE CONVERT.
Expand All @@ -510,13 +514,16 @@ Return Value:
this->GemmU8S8Dispatch = &MlasGemmU8S8DispatchAmx;
}
}
#endif // __APPLE__
#endif // !defined(__APPLE__) && !defined(ORT_DISABLE_AMX)

#endif // ORT_MINIMAL_BUILD

}
#endif // !defined(ORT_DISABLE_AVX2)

#endif // MLAS_TARGET_AMD64
#endif // !defined(ORT_DISABLE_AVX)


}
}
Expand Down

0 comments on commit d0aada7

Please sign in to comment.