Skip to content

Commit

Permalink
Allow configuration template to disable some SIMD.
Browse files Browse the repository at this point in the history
  • Loading branch information
jslap-ubi committed Nov 29, 2024
1 parent 1128882 commit 7ca7306
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
6 changes: 4 additions & 2 deletions onnxruntime/core/mlas/lib/convsym.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ struct MLAS_CONV_SYM_DISPATCH {

#if defined(MLAS_TARGET_AMD64)

#if !defined(ORT_DISABLE_AVX2)
const MLAS_CONV_SYM_DISPATCH MlasConvSymDispatchAvx2 = {
MlasConvSymKernelAvx2,
MlasConvSymDepthwiseKernelAvx2,
Expand Down Expand Up @@ -194,8 +195,9 @@ const MLAS_CONV_SYM_DISPATCH MlasConvSymDispatchAvxVnni = {
4, // KernelDepthwiseOutputCount
false, // FixupInputZeroPoint
};
#endif // !defined(ORT_DISABLE_AVX2)

#if !defined(ORT_MINIMAL_BUILD)
#if !defined(ORT_MINIMAL_BUILD) && !defined(ORT_DISABLE_AVX512)

const MLAS_CONV_SYM_DISPATCH MlasConvSymDispatchAvx512Core = {
MlasConvSymKernelAvx512Core,
Expand Down Expand Up @@ -229,7 +231,7 @@ const MLAS_CONV_SYM_DISPATCH MlasConvSymDispatchAvx512Vnni = {
false, // FixupInputZeroPoint
};

#endif // ORT_MINIMAL_BUILD
#endif // !defined(ORT_MINIMAL_BUILD) && !defined(ORT_DISABLE_AVX512)

#elif defined(MLAS_TARGET_ARM64)
const MLAS_CONV_SYM_DISPATCH MlasConvSymU8DispatchNeon = {
Expand Down
13 changes: 10 additions & 3 deletions onnxruntime/core/mlas/lib/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ Return Value:
__cpuid(1, Cpuid1[0], Cpuid1[1], Cpuid1[2], Cpuid1[3]);
#endif

#if defined(_MSC_VER)
#if defined(_MSC_VER) && !defined(ORT_DISABLE_SSE4)

//
// Check if the processor supports SSE 4.1 instructions.
Expand Down Expand Up @@ -340,6 +340,7 @@ Return Value:

if ((xcr0 & 0x6) == 0x6) {

#if !defined(ORT_DISABLE_AVX)
this->GemmFloatKernel = MlasGemmFloatKernelAvx;

#if defined(MLAS_TARGET_AMD64)
Expand Down Expand Up @@ -372,6 +373,7 @@ Return Value:
__cpuid_count(7, 0, Cpuid7[0], Cpuid7[1], Cpuid7[2], Cpuid7[3]);
#endif

#if !defined(ORT_DISABLE_AVX2)
if (((Cpuid1[2] & 0x1000) != 0) && ((Cpuid7[1] & 0x20) != 0)) {

this->GemmU8S8Dispatch = &MlasGemmU8S8DispatchAvx2;
Expand Down Expand Up @@ -433,6 +435,7 @@ Return Value:

#if !defined(ORT_MINIMAL_BUILD)

#if !defined(ORT_DISABLE_AVX512)
//
// Check if the processor supports AVX512F features and the
// operating system supports saving AVX512F state.
Expand Down Expand Up @@ -486,6 +489,7 @@ Return Value:
}
}
}
#endif // !defined(ORT_DISABLE_AVX512)

//
// Check if the processor supports AVX-VNNI-INT8
Expand All @@ -498,7 +502,7 @@ Return Value:
this->GemmS8U8Kernel = MlasGemmS8U8KernelAvx2Vnni;
}

#ifndef __APPLE__
#if !defined(__APPLE__) && !defined(ORT_DISABLE_AMX)
#if (defined(_MSC_VER) && (_MSC_VER >= 1933)) || (defined(__GNUC__) && (__GNUC__ >= 13))
//
// Check if the processor supports AVX NE CONVERT.
Expand All @@ -521,13 +525,16 @@ Return Value:
this->GemmU8S8Dispatch = &MlasGemmU8S8DispatchAmx;
}
}
#endif // __APPLE__
#endif // !defined(__APPLE__) && !defined(ORT_DISABLE_AMX)

#endif // ORT_MINIMAL_BUILD

}
#endif // !defined(ORT_DISABLE_AVX2)

#endif // MLAS_TARGET_AMD64
#endif // !defined(ORT_DISABLE_AVX)


}
}
Expand Down

0 comments on commit 7ca7306

Please sign in to comment.