diff --git a/onnxruntime/test/mlas/bench/bench_computesoftmax.cpp b/onnxruntime/test/mlas/bench/bench_computesoftmax.cpp index f777a7cfc4302..6181be873f73e 100644 --- a/onnxruntime/test/mlas/bench/bench_computesoftmax.cpp +++ b/onnxruntime/test/mlas/bench/bench_computesoftmax.cpp @@ -67,6 +67,8 @@ void COMPUTESOFTMAXINPLACE(benchmark::State& state) { free(ptr.underlying_buffer); } +#if defined(MLAS_TARGET_AMD64) + void REDUCEMAXIMUMF32KERNELAVX(benchmark::State& state) { const auto byte_aligned = narrow(state.range(0)); const auto D = narrow(state.range(1)); @@ -174,6 +176,8 @@ void COMPUTESOFTMAXOUTPUTF32KERNELAVX(benchmark::State& state) { free(ptr.underlying_buffer); } +#endif // defined(MLAS_TARGET_AMD64) + static void ComputeSoftmaxInplaceArgs(benchmark::internal::Benchmark* b) { b->ArgNames({"ByteAligned", "N", "D", "Threads"}); for (int threads : {1, 8}) { @@ -200,6 +204,8 @@ static void ComputeSoftmaxInplaceArgs(benchmark::internal::Benchmark* b) { BENCHMARK(COMPUTESOFTMAXINPLACE)->Apply(ComputeSoftmaxInplaceArgs)->UseRealTime(); +#if defined(MLAS_TARGET_AMD64) + BENCHMARK(REDUCEMAXIMUMF32KERNELAVX) ->ArgNames({"ByteAligned", "D"}) ->ArgsProduct({ @@ -231,3 +237,5 @@ BENCHMARK(COMPUTESOFTMAXOUTPUTF32KERNELAVX) {3, 4, 5, 7, 9, 11, 13, 15, 16, 500, 2000}, // D }) ->UseRealTime(); + +#endif // defined(MLAS_TARGET_AMD64)