From a16da8aff8c1e8044764766eeccd2ed23124391b Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Mon, 18 Mar 2024 06:36:01 +0000 Subject: [PATCH] adjust default test tolerance; disable tf32; fix A100 failed tests --- .../test/contrib_ops/attention_op_test.cc | 20 +--- .../test/contrib_ops/beam_search_test.cc | 20 +++- .../contrib_ops/decoder_attention_op_test.cc | 7 +- ...oder_masked_multihead_attention_op_test.cc | 6 +- onnxruntime/test/contrib_ops/fft_op_test.cc | 2 + .../test/contrib_ops/greedy_search_test.cc | 14 ++- .../test/contrib_ops/gridsample_test.cc | 1 + .../test/contrib_ops/layer_norm_op_test.cc | 6 + onnxruntime/test/contrib_ops/moe_test.cc | 2 + .../contrib_ops/packed_attention_op_test.cc | 3 +- .../packed_multihead_attention_op_test.cc | 2 + .../contrib_ops/quantize_attention_op_test.cc | 2 + onnxruntime/test/contrib_ops/sampling_test.cc | 9 +- onnxruntime/test/onnx/main.cc | 14 ++- onnxruntime/test/providers/base_tester.cc | 9 ++ onnxruntime/test/providers/base_tester.h | 3 + onnxruntime/test/providers/checkers.cc | 112 ++++++++++-------- .../cpu/activation/activation_op_test.h | 5 + .../test/providers/cpu/math/einsum_test.cc | 79 ++++++------ .../cpu/math/element_wise_ops_test.cc | 15 ++- .../providers/cpu/math/logsoftmax_test.cc | 10 +- onnxruntime/test/providers/cpu/model_tests.cc | 21 +--- .../providers/cpu/nn/batch_norm_op_test.cc | 9 +- .../test/providers/cpu/nn/pool_op_test.cc | 1 + .../cpu/object_detection/roialign_test.cc | 7 +- .../cpu/rnn/deep_cpu_lstm_op_test.cc | 2 + .../providers/cpu/tensor/affine_grid_test.cc | 17 +++ .../mean_variance_normalization_test.cc | 5 + .../providers/cpu/tensor/onehot_op_test.cc | 17 ++- .../providers/cpu/tensor/resize_op_test.cc | 75 +++++++----- .../providers/cpu/tensor/upsample_op_test.cc | 4 +- onnxruntime/test/util/default_providers.cc | 6 +- .../test/gradient/optimizer_ops_test.cc | 15 +++ .../cpu/nn/batchnorm_internal_test.cc | 2 + 34 files changed, 340 insertions(+), 182 deletions(-) diff --git a/onnxruntime/test/contrib_ops/attention_op_test.cc b/onnxruntime/test/contrib_ops/attention_op_test.cc index b652e0723f5aa..5ed68e1b48133 100644 --- a/onnxruntime/test/contrib_ops/attention_op_test.cc +++ b/onnxruntime/test/contrib_ops/attention_op_test.cc @@ -227,6 +227,12 @@ static void RunAttentionTest( tester.AddOptionalInputEdge(); } + if (use_float16) { + tester.SetOutputTolerance(0.005f, 0.005f); + } else { + tester.SetOutputTolerance(0.001f, 0.001f); + } + if (enable_cuda) { std::vector> execution_providers; execution_providers.push_back(DefaultCudaExecutionProvider()); @@ -2013,13 +2019,6 @@ TEST(AttentionTest, AttentionMaskIndexOutOfRange) { #if !defined(__wasm__) // TODO: fix in web assembly TEST(AttentionTest, AttentionPastState_dynamic) { - // ORT enables TF32 in GEMM for A100. TF32 will cause precsion loss and fail this test. - // Do not run this test unless TF32 is disabled explicitly. - if (HasCudaEnvironment(800) && ParseEnvironmentVariableWithDefault("NVIDIA_TF32_OVERRIDE", 1) != 0) { - GTEST_SKIP() << "Skipping AttentionPastState_dynamic in A100 since TF32 is enabled"; - return; - } - // create rand inputs RandomValueGenerator random{}; @@ -2101,13 +2100,6 @@ static void RunModelWithRandomInput( std::vector& mask_index_data, std::string& onnx_model, bool is_float16) { - // ORT enables TF32 in GEMM for A100. TF32 will cause precsion loss and fail this test. - // Do not run this test unless TF32 is disabled explicitly. - if (HasCudaEnvironment(800) && ParseEnvironmentVariableWithDefault("NVIDIA_TF32_OVERRIDE", 1) != 0) { - GTEST_SKIP() << "Skipping RunModelWithRandomInput in A100 since TF32 is enabled"; - return; - } - RandomValueGenerator random{234}; constexpr int hidden_size = 768; diff --git a/onnxruntime/test/contrib_ops/beam_search_test.cc b/onnxruntime/test/contrib_ops/beam_search_test.cc index 156ed3799fc22..6ce9f5de68f11 100644 --- a/onnxruntime/test/contrib_ops/beam_search_test.cc +++ b/onnxruntime/test/contrib_ops/beam_search_test.cc @@ -8,6 +8,10 @@ #include "core/session/onnxruntime_cxx_api.h" #include "test/common/cuda_op_test_utils.h" +#ifdef USE_CUDA +#include "core/providers/cuda/cuda_provider_options.h" +#endif + extern std::unique_ptr ort_env; namespace onnxruntime { @@ -70,7 +74,9 @@ TEST(BeamSearchTest, GptBeamSearchFp32) { Ort::SessionOptions session_options; #ifdef USE_CUDA - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0)); + OrtCUDAProviderOptionsV2 cuda_options; + cuda_options.use_tf32 = false; + session_options.AppendExecutionProvider_CUDA_V2(cuda_options); #endif #ifdef USE_ROCM @@ -161,7 +167,9 @@ TEST(BeamSearchTest, GptBeamSearchFp16) { if (enable_cuda || enable_rocm) { Ort::SessionOptions session_options; #ifdef USE_CUDA - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0)); + OrtCUDAProviderOptionsV2 cuda_options; + cuda_options.use_tf32 = false; + session_options.AppendExecutionProvider_CUDA_V2(cuda_options); #endif #ifdef USE_ROCM @@ -254,7 +262,9 @@ TEST(BeamSearchTest, GptBeamSearchWithInitDecoderFp16) { if (enable_cuda || enable_rocm) { Ort::SessionOptions session_options; #ifdef USE_CUDA - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0)); + OrtCUDAProviderOptionsV2 cuda_options; + cuda_options.use_tf32 = false; + session_options.AppendExecutionProvider_CUDA_V2(cuda_options); #endif #ifdef USE_ROCM @@ -346,7 +356,9 @@ TEST(BeamSearchTest, GptBeamSearchFp16_VocabPadded) { if (enable_cuda || enable_rocm) { Ort::SessionOptions session_options; #ifdef USE_CUDA - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0)); + OrtCUDAProviderOptionsV2 cuda_options; + cuda_options.use_tf32 = false; + session_options.AppendExecutionProvider_CUDA_V2(cuda_options); #endif #ifdef USE_ROCM diff --git a/onnxruntime/test/contrib_ops/decoder_attention_op_test.cc b/onnxruntime/test/contrib_ops/decoder_attention_op_test.cc index 88a2bdf6a4849..8a37ef921fd2b 100644 --- a/onnxruntime/test/contrib_ops/decoder_attention_op_test.cc +++ b/onnxruntime/test/contrib_ops/decoder_attention_op_test.cc @@ -31,10 +31,8 @@ static void RunAttentionTest( const std::vector* new_value_cache = nullptr, const std::vector* key_cache = nullptr, const std::vector* value_cache = nullptr, - const std::initializer_list* key_padding_mask_data = nullptr, - bool use_float16 = false) { - int min_cuda_architecture = use_float16 ? 530 : 0; - bool enable_cuda = HasCudaEnvironment(min_cuda_architecture); + const std::initializer_list* key_padding_mask_data = nullptr) { + bool enable_cuda = HasCudaEnvironment(0); bool enable_rocm = (nullptr != DefaultRocmExecutionProvider().get()); bool enable_cpu = false; @@ -99,6 +97,7 @@ static void RunAttentionTest( tester.AddOutput("new_key_cache", output_cache_dims, *new_key_cache); tester.AddOutput("new_value_cache", output_cache_dims, *new_value_cache); } + tester.SetOutputTolerance(0.001f, 0.001f); std::vector> execution_providers; if (enable_cuda) { diff --git a/onnxruntime/test/contrib_ops/decoder_masked_multihead_attention_op_test.cc b/onnxruntime/test/contrib_ops/decoder_masked_multihead_attention_op_test.cc index acaae2dcd9712..e03e522ebbfce 100644 --- a/onnxruntime/test/contrib_ops/decoder_masked_multihead_attention_op_test.cc +++ b/onnxruntime/test/contrib_ops/decoder_masked_multihead_attention_op_test.cc @@ -754,9 +754,10 @@ TEST(DecoderMaskedSelfAttentionTest, Test_fp32) { // Output(s) tester.AddOutput("output", input_dims, output); - tester.AddOutput("present", past_dims, present); + tester.SetOutputTolerance(0.001f, 0.001f); + // Run - Regular kernel execution path { std::vector> execution_providers; @@ -897,9 +898,10 @@ TEST(DecoderMaskedSelfAttentionTest, Test_fp16) { // Output(s) tester.AddOutput("output", input_dims, output); - tester.AddOutput("present", past_dims, present); + tester.SetOutputTolerance(0.005f, 0.001f); + // Run - Regular kernel execution path { std::vector> execution_providers; diff --git a/onnxruntime/test/contrib_ops/fft_op_test.cc b/onnxruntime/test/contrib_ops/fft_op_test.cc index 56a6466c760f6..be04ee0e49911 100644 --- a/onnxruntime/test/contrib_ops/fft_op_test.cc +++ b/onnxruntime/test/contrib_ops/fft_op_test.cc @@ -25,6 +25,7 @@ TEST(ContribOpTest, Rfft) { // Target values conputed using PyTorch torch.fft.rfft(X, dim=-1, norm="backward") test.AddInput("X", {4, 4}, {0.8129f, 1.3108f, -0.8790f, -1.2046f, 0.1661f, -0.9831f, 0.5879f, 0.4918f, 1.2506f, 0.7244f, -2.6260f, -1.1268f, -1.6885f, 1.0439f, -0.2595f, 1.8780f}); test.AddOutput("Y", {4, 3, 2}, {0.0400f, 0.0000f, 1.6919f, -2.5154f, -0.1722f, 0.0000f, 0.2627f, 0.0000f, -0.4218f, 1.4748f, 1.2454f, 0.0000f, -1.7779f, 0.0000f, 3.8766f, -1.8512f, -0.9730f, 0.0000f, 0.9740f, 0.0000f, -1.4290f, 0.8341f, -4.8699f, 0.0000f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); } @@ -45,6 +46,7 @@ TEST(ContribOpTest, Irfft) { test.AddAttribute("normalized", static_cast(0)); test.AddInput("X", {4, 3, 2}, {0.0400f, 0.0000f, 1.6919f, -2.5154f, -0.1722f, 0.0000f, 0.2627f, 0.0000f, -0.4218f, 1.4748f, 1.2454f, 0.0000f, -1.7779f, 0.0000f, 3.8766f, -1.8512f, -0.9730f, 0.0000f, 0.9740f, 0.0000f, -1.4290f, 0.8341f, -4.8699f, 0.0000f}); test.AddOutput("Y", {4, 4}, {0.8129f, 1.3108f, -0.8790f, -1.2046f, 0.1661f, -0.9831f, 0.5879f, 0.4918f, 1.2506f, 0.7244f, -2.6260f, -1.1268f, -1.6885f, 1.0439f, -0.2595f, 1.8780f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); } } // namespace test diff --git a/onnxruntime/test/contrib_ops/greedy_search_test.cc b/onnxruntime/test/contrib_ops/greedy_search_test.cc index 1baf50c1ba616..8186529f8df45 100644 --- a/onnxruntime/test/contrib_ops/greedy_search_test.cc +++ b/onnxruntime/test/contrib_ops/greedy_search_test.cc @@ -8,6 +8,10 @@ #include "core/session/onnxruntime_cxx_api.h" #include "test/common/cuda_op_test_utils.h" +#ifdef USE_CUDA +#include "core/providers/cuda/cuda_provider_options.h" +#endif + extern std::unique_ptr ort_env; namespace onnxruntime { @@ -64,9 +68,13 @@ TEST(GreedySearchTest, GptGreedySearchFp16_VocabPadded) { if (is_cuda || is_rocm) { Ort::SessionOptions session_options; +#ifdef USE_CUDA if (is_cuda) { - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0)); + OrtCUDAProviderOptionsV2 cuda_options; + cuda_options.use_tf32 = false; + session_options.AppendExecutionProvider_CUDA_V2(cuda_options); } +#endif if (is_rocm) { Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(session_options, 0)); } @@ -146,7 +154,9 @@ TEST(GreedySearchTest, GptGreedySearchFp32) { if (is_cuda || is_rocm) { Ort::SessionOptions session_options; if (is_cuda) { - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0)); + OrtCUDAProviderOptionsV2 cuda_options; + cuda_options.use_tf32 = false; + session_options.AppendExecutionProvider_CUDA_V2(cuda_options); } if (is_rocm) { Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(session_options, 0)); diff --git a/onnxruntime/test/contrib_ops/gridsample_test.cc b/onnxruntime/test/contrib_ops/gridsample_test.cc index 46ed04301a9e8..9251f1b24cbcc 100644 --- a/onnxruntime/test/contrib_ops/gridsample_test.cc +++ b/onnxruntime/test/contrib_ops/gridsample_test.cc @@ -126,6 +126,7 @@ TEST(GridsampleContribOpTest, gridsample_mode_bicubic) { 0.5000f, 0.5000f, 1.0000f, 1.0000f}); test.AddAttribute("mode", "bicubic"); test.AddOutput("Y", {1, 1, 2, 4}, {-0.1406f, 0.3828f, 1.7556f, 2.9688f, 2.9688f, 1.7556f, 5.1445f, 1.3906f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaNHWCExecutionProvider}); } diff --git a/onnxruntime/test/contrib_ops/layer_norm_op_test.cc b/onnxruntime/test/contrib_ops/layer_norm_op_test.cc index 98fb62e435f31..2b77825d80b0e 100644 --- a/onnxruntime/test/contrib_ops/layer_norm_op_test.cc +++ b/onnxruntime/test/contrib_ops/layer_norm_op_test.cc @@ -160,6 +160,7 @@ TEST(LayerNormTest, LayerNorm_Scale_Bias) { test.AddInput("gamma", {2}, {-0.6953f, 5.1824f}); test.AddInput("bias", {2}, {0.6435f, -0.3964f}); test.AddOutput("output", dims, {-0.0516f, -5.5776f, -0.0518f, -5.5788f, -0.0518f, -5.5788f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -172,6 +173,8 @@ TEST(LayerNormTest, LayerNorm_Scale_Bias_Float16Input) { test.AddInput("gamma", {2}, {-0.6953f, 5.1824f}); test.AddInput("bias", {2}, {0.6435f, -0.3964f}); test.AddOutput("output", dims, {-0.0516f, -5.5776f, -0.0518f, -5.5788f, -0.0518f, -5.5788f}); + test.SetOutputTolerance(0.0001f, 0.0001f); + // TRT, DNNL, OpenVINO and NNAPI, CoreML don't support this combination of datatypes test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kDnnlExecutionProvider, kQnnExecutionProvider, @@ -228,6 +231,9 @@ TEST(LayerNormTest, LayerNorm17_double) { test.AddInput("x", dims, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}); test.AddInput("gamma", {3}, {1.0, 1.0, 1.0}); test.AddOutput("output", dims, {-1.2247, 0.0, 1.2247, -1.2247, 0.0, 1.2247}); + + test.SetOutputTolerance(0.0001f, 0.0001f); + // DNNL does not support double test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kDnnlExecutionProvider}); } diff --git a/onnxruntime/test/contrib_ops/moe_test.cc b/onnxruntime/test/contrib_ops/moe_test.cc index ebb0261deefa5..45fe830425a35 100644 --- a/onnxruntime/test/contrib_ops/moe_test.cc +++ b/onnxruntime/test/contrib_ops/moe_test.cc @@ -47,6 +47,7 @@ static void RunMoETest( tester.AddInput("fc1_experts_bias", fc1_experts_bias_dims, ToFloat16(fc1_experts_bias)); tester.AddInput("fc2_experts_bias", fc2_experts_bias_dims, ToFloat16(fc2_experts_bias)); tester.AddOutput("output", output_dims, ToFloat16(output_data)); + tester.SetOutputTolerance(0.005f, 0.005f); } else { tester.AddInput("input", input_dims, input); tester.AddInput("router_probs", router_probs_dims, router_probs); @@ -55,6 +56,7 @@ static void RunMoETest( tester.AddInput("fc1_experts_bias", fc1_experts_bias_dims, fc1_experts_bias); tester.AddInput("fc2_experts_bias", fc2_experts_bias_dims, fc2_experts_bias); tester.AddOutput("output", output_dims, output_data); + tester.SetOutputTolerance(0.001f, 0.001f); } std::vector> execution_providers; diff --git a/onnxruntime/test/contrib_ops/packed_attention_op_test.cc b/onnxruntime/test/contrib_ops/packed_attention_op_test.cc index 31ef62e69bb88..09baf8def05f6 100644 --- a/onnxruntime/test/contrib_ops/packed_attention_op_test.cc +++ b/onnxruntime/test/contrib_ops/packed_attention_op_test.cc @@ -433,8 +433,7 @@ static void RunModelWithRandomInput( std::vector token_offset_dims{batch_size, sequence_length}; std::vector cum_seq_len_dims{batch_size + 1}; - // TF32 in SM >= 80 is enabled by default, need larger threshold for float when TF32 is enabled. - float gpu_threshold = is_float16 ? 0.15f : (HasCudaEnvironment(800) ? 0.05f : 0.005f); + float gpu_threshold = is_float16 ? 0.15f : 0.005f; gpu_threshold *= sequence_length > 1024 ? 4.0f : 1.0f; // threshold should increase with sequence length bool enable_cuda = HasCudaEnvironment(is_float16 ? 530 : 0); if (enable_cuda) { diff --git a/onnxruntime/test/contrib_ops/packed_multihead_attention_op_test.cc b/onnxruntime/test/contrib_ops/packed_multihead_attention_op_test.cc index 22253955566f2..9b09362db5c5f 100644 --- a/onnxruntime/test/contrib_ops/packed_multihead_attention_op_test.cc +++ b/onnxruntime/test/contrib_ops/packed_multihead_attention_op_test.cc @@ -107,6 +107,7 @@ static void RunPackedMultiHeadAttentionTest( } tester.AddOutput("output", output_dims, ToFloat16(output_data)); + tester.SetOutputTolerance(0.005f, 0.005f); } else { if (is_packed_qkv) { tester.AddInput("query", packed_qkv_dims, query_data); @@ -131,6 +132,7 @@ static void RunPackedMultiHeadAttentionTest( } tester.AddOutput("output", output_dims, output_data); + tester.SetOutputTolerance(0.001f, 0.001f); } std::vector> execution_providers; diff --git a/onnxruntime/test/contrib_ops/quantize_attention_op_test.cc b/onnxruntime/test/contrib_ops/quantize_attention_op_test.cc index fd222583ac67f..72dce2628221b 100644 --- a/onnxruntime/test/contrib_ops/quantize_attention_op_test.cc +++ b/onnxruntime/test/contrib_ops/quantize_attention_op_test.cc @@ -90,11 +90,13 @@ void RunQAttention(const std::vector& input_data, tester.AddInput("input_scale", {1}, ToFloat16({input_quant_params.scale})); tester.AddInput("weight_scale", {1}, ToFloat16({weight_quant_params.scale})); tester.AddOutput("output", output_dims, ToFloat16(output_data)); + tester.SetOutputTolerance(0.01f, 0.01f); } else { tester.AddInput("bias", bias_dims, bias_data); tester.AddInput("input_scale", {1}, {input_quant_params.scale}); tester.AddInput("weight_scale", {1}, {weight_quant_params.scale}); tester.AddOutput("output", output_dims, output_data); + tester.SetOutputTolerance(0.005f, 0.005f); } if (mask_index_data.size() > 0) { diff --git a/onnxruntime/test/contrib_ops/sampling_test.cc b/onnxruntime/test/contrib_ops/sampling_test.cc index 733bc9f01fd11..d987a1cae427d 100644 --- a/onnxruntime/test/contrib_ops/sampling_test.cc +++ b/onnxruntime/test/contrib_ops/sampling_test.cc @@ -8,6 +8,10 @@ #include "core/session/onnxruntime_cxx_api.h" #include "test/common/cuda_op_test_utils.h" +#ifdef USE_CUDA +#include "core/providers/cuda/cuda_provider_options.h" +#endif + extern std::unique_ptr ort_env; namespace onnxruntime { @@ -65,7 +69,10 @@ TEST(SamplingTest, Gpt2Sampling_GPU) { LOGS_DEFAULT(WARNING) << "Hardware NOT support current architecture"; return; } - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0)); + + OrtCUDAProviderOptionsV2 cuda_options; + cuda_options.use_tf32 = false; + session_options.AppendExecutionProvider_CUDA_V2(cuda_options); #else // USE_ROCM OrtROCMProviderOptions rocm_options; // TODO - verify the default settings diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 9c2c24e3c337d..db706bf929748 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -25,6 +25,10 @@ #include "core/session/onnxruntime_session_options_config_keys.h" #include "nlohmann/json.hpp" +#ifdef USE_CUDA +#include "core/providers/cuda/cuda_provider_options.h" +#endif + using namespace onnxruntime; namespace { @@ -401,12 +405,13 @@ int real_main(int argc, char* argv[], Ort::Env& env) { if (enable_tensorrt) { #ifdef USE_TENSORRT - OrtCUDAProviderOptions cuda_options; + OrtCUDAProviderOptionsV2 cuda_options; cuda_options.device_id = device_id; cuda_options.do_copy_in_default_stream = true; + cuda_options.use_tf32 = false; // TODO: Support arena configuration for users of test runner Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(sf, device_id)); - sf.AppendExecutionProvider_CUDA(cuda_options); + sf.AppendExecutionProvider_CUDA_V2(cuda_options); #else fprintf(stderr, "TensorRT is not supported in this build"); return -1; @@ -424,10 +429,11 @@ int real_main(int argc, char* argv[], Ort::Env& env) { } if (enable_cuda) { #ifdef USE_CUDA - OrtCUDAProviderOptions cuda_options; + OrtCUDAProviderOptionsV2 cuda_options; cuda_options.do_copy_in_default_stream = true; + cuda_options.use_tf32 = false; // TODO: Support arena configuration for users of test runner - sf.AppendExecutionProvider_CUDA(cuda_options); + sf.AppendExecutionProvider_CUDA_V2(cuda_options); #else fprintf(stderr, "CUDA is not supported in this build"); return -1; diff --git a/onnxruntime/test/providers/base_tester.cc b/onnxruntime/test/providers/base_tester.cc index e94f8c2673be3..4a62edf64bb29 100644 --- a/onnxruntime/test/providers/base_tester.cc +++ b/onnxruntime/test/providers/base_tester.cc @@ -120,6 +120,15 @@ void BaseTester::SetOutputRelErr(const char* name, float v) { it->validation_params.relative_error = optional(v); } +void BaseTester::SetOutputTolerance(float abs_error, float rel_error) { + for (auto& output : output_data_) { + if (output.def.Exists()) { + output.validation_params.absolute_error = optional(abs_error); + output.validation_params.relative_error = optional(rel_error); + } + } +} + std::vector BaseTester::GetDimsForProto(gsl::span dims) { std::vector dims_for_proto{dims.begin(), dims.end()}; if (add_symbolic_dim_to_tensor_data_ >= 0 && diff --git a/onnxruntime/test/providers/base_tester.h b/onnxruntime/test/providers/base_tester.h index 5607e58315a12..4a5df0be044b8 100644 --- a/onnxruntime/test/providers/base_tester.h +++ b/onnxruntime/test/providers/base_tester.h @@ -522,6 +522,9 @@ class BaseTester { void SetOutputAbsErr(const char* name, float v); void SetOutputRelErr(const char* name, float v); + // Set absolute and relative error for added outputs. + void SetOutputTolerance(float abs_error, float rel_error); + // Number of times to call InferenceSession::Run. The same feeds are used each time. // e.g. used to verify the generator ops behave as expected void SetNumRunCalls(int n) { diff --git a/onnxruntime/test/providers/checkers.cc b/onnxruntime/test/providers/checkers.cc index c97e6d9de4911..a32e792374cf8 100644 --- a/onnxruntime/test/providers/checkers.cc +++ b/onnxruntime/test/providers/checkers.cc @@ -20,46 +20,87 @@ struct DefaultTolerance; template <> struct DefaultTolerance { - static constexpr float absolute = 1e-6f; + static constexpr float absolute = 1e-5f; static constexpr float relative = 1e-5f; + + // Allow to have different default absolute tolerance for different providers. + static float get_absolute(const std::string& /*provider_type*/) { + return absolute; + } }; template <> struct DefaultTolerance { +#if defined(ENABLE_TRAINING) + static constexpr float absolute = 1e-4f; +#else static constexpr float absolute = 1e-5f; +#endif + static constexpr float relative = 1e-4f; + + static float get_absolute(const std::string& /*provider_type*/) { + return absolute; + } }; template <> struct DefaultTolerance { - // The thresholds are estimated with PyTorch script like the following: + // The thresholds for inference are estimated with PyTorch script like the following: // x = torch.rand(1000, 1000) // absolute = ((x + 1e-6).to(torch.float16) - x).abs().max() * 10 // x[abs(x) < absolute] = absolute // relative = ((x - x.to(torch.float16)) / x).abs().max() * 2 +#if defined(ENABLE_TRAINING) + static constexpr float absolute = 0.005f; +#else static constexpr float absolute = 0.0025f; +#endif + static constexpr float relative = 0.001f; + + static float get_absolute(const std::string& provider_type) { + if (provider_type == kDmlExecutionProvider) { + return 0.005f; + } + return absolute; + } }; template <> struct DefaultTolerance { + // The thresholds for inference are estimated with PyTorch script like the following: + // x = torch.rand(1000, 1000) + // absolute = ((x + 1e-6).to(torch.bfloat16) - x).abs().max() * 10 + // x[abs(x) < absolute] = absolute + // relative = ((x - x.to(torch.bfloat16)) / x).abs().max() * 2 static constexpr float absolute = 0.02f; static constexpr float relative = 0.01f; + + static float get_absolute(const std::string& /*provider_type*/) { + return absolute; + } +}; + +struct ToleranceParams { + float absolute; + float relative; }; template -T get_tolerance(float absolute, float relative, T expected_value) { +ToleranceParams get_tolerance_params(const ValidateOutputParams& params, const std::string& provider_type) { + ToleranceParams new_params; + new_params.absolute = params.absolute_error.has_value() ? *(params.absolute_error) : DefaultTolerance::get_absolute(provider_type); + new_params.relative = params.relative_error.has_value() ? *(params.relative_error) : DefaultTolerance::relative; + return new_params; +} + +template +T get_tolerance(const ToleranceParams& params, T expected_value) { static_assert(std::is_floating_point::value, "T must be a floating point type"); // The formula is similar to numpy.isclose: https://numpy.org/doc/stable/reference/generated/numpy.isclose.html - return static_cast(absolute) + static_cast(relative) * std::abs(expected_value); -} - -template // D is the original data type -T get_tolerance(const ValidateOutputParams& params, T expected_value) { - float absolute = (params.absolute_error.has_value() ? *(params.absolute_error) : DefaultTolerance::absolute); - float relative = (params.relative_error.has_value() ? *(params.relative_error) : DefaultTolerance::relative); - return get_tolerance(absolute, relative, expected_value); + return static_cast(params.absolute) + static_cast(params.relative) * std::abs(expected_value); } template @@ -221,11 +262,9 @@ struct TensorCheck { void operator()(const Tensor& expected, const Tensor& actual, const ValidateOutputParams& params, - const std::string& /*provider_type*/) const { + const std::string& provider_type) const { auto size = actual.Shape().Size(); - const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value(); - // deal with rare cases in which order of output data from a kernel MAY be // undefined Tensor expected_sorted, actual_sorted; @@ -240,10 +279,7 @@ struct TensorCheck { cur_actual = actual.Data(); } - double threshold = 0.001; -#if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML) - threshold = 0.005; -#endif + auto tolerance_params = get_tolerance_params(params, provider_type); for (int64_t i = 0; i < size; ++i) { // NOTE: Check isnan first to work around MSVC linker bug when /LTCG:incremental is specified. @@ -253,7 +289,7 @@ struct TensorCheck { } else if (std::isinf(cur_expected[i])) { // Test infinity for equality EXPECT_EQ(cur_expected[i], cur_actual[i]) << "Expected infinity. i:" << i; } else { - double tolerance = has_tolerance ? get_tolerance(params, cur_expected[i]) : threshold; + double tolerance = get_tolerance(tolerance_params, cur_expected[i]); EXPECT_NEAR(cur_expected[i], cur_actual[i], tolerance) << "i:" << i; } } @@ -264,9 +300,7 @@ template void InternalNumericalCheck(const Tensor& expected, const Tensor& actual, const ValidateOutputParams& params, - const std::string& /*provider_type*/) { - const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value(); - + const std::string& provider_type) { // deal with rare cases in which order of output data from a kernel MAY be // undefined Tensor expected_sorted, actual_sorted; @@ -282,11 +316,7 @@ void InternalNumericalCheck(const Tensor& expected, cur_actual = actual.Data(); } -#if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML) - constexpr float threshold = 0.005f; -#else - constexpr float threshold = 0.0001f; -#endif + auto tolerance_params = get_tolerance_params(params, provider_type); for (int64_t i = 0; i < size; ++i) { // NOTE: Check isnan first to work around MSVC linker bug when /LTCG:incremental is specified. @@ -296,7 +326,7 @@ void InternalNumericalCheck(const Tensor& expected, } else if (std::isinf(cur_expected[i])) { // Test infinity for equality EXPECT_EQ(cur_expected[i], cur_actual[i]) << "Expected infinity. i:" << i; } else { - T tolerance = has_tolerance ? get_tolerance(params, cur_expected[i]) : threshold; + T tolerance = get_tolerance(tolerance_params, cur_expected[i]); EXPECT_NEAR(cur_expected[i], cur_actual[i], tolerance) << "i:" << i; } } @@ -317,7 +347,7 @@ struct TensorCheck { void operator()(const Tensor& expected, const Tensor& actual, const ValidateOutputParams& params, - const std::string& /*provider_type*/) const { + const std::string& provider_type) const { auto* cur_expected = expected.Data(); auto* cur_actual = actual.Data(); auto size = actual.Shape().Size(); @@ -333,21 +363,15 @@ struct TensorCheck { sort_expected_and_actual_buffers(f_expected, f_actual); } - const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value(); + auto tolerance_params = get_tolerance_params(params, provider_type); - float threshold = 0.001f; -#if defined(USE_TENSORRT) || defined(ENABLE_TRAINING_CORE) || defined(USE_CUDA) || defined(USE_ROCM) - threshold = 0.005f; -#elif defined(USE_DML) - threshold = 0.02f; -#endif for (int64_t i = 0; i < size; ++i) { if (std::isnan(f_expected[i])) { EXPECT_TRUE(std::isnan(f_expected[i])) << "Expected NaN. i:" << i; } else if (std::isinf(f_expected[i])) { // Test infinity for equality EXPECT_EQ(f_expected[i], f_actual[i]) << "Expected infinity. i:" << i; } else { - float tolerance = has_tolerance ? get_tolerance(params, f_expected[i]) : threshold; + float tolerance = get_tolerance(tolerance_params, f_expected[i]); EXPECT_NEAR(f_expected[i], f_actual[i], tolerance) << "i:" << i; } } @@ -359,7 +383,7 @@ struct TensorCheck { void operator()(const Tensor& expected, const Tensor& actual, const ValidateOutputParams& params, - const std::string& /*provider_type*/) const { + const std::string& provider_type) const { auto* cur_expected = expected.Data(); auto* cur_actual = actual.Data(); auto size = actual.Shape().Size(); @@ -375,13 +399,7 @@ struct TensorCheck { sort_expected_and_actual_buffers(f_expected, f_actual); } - const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value(); - - float abs_threshold = 0.0001f; - float rel_threshold = 0.001f; -#if defined(USE_TENSORRT) || defined(ENABLE_TRAINING_CORE) || defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML) || defined(USE_DNNL) - rel_threshold = 0.05f; // expect at least 95% close -#endif + auto tolerance_params = get_tolerance_params(params, provider_type); for (int64_t i = 0; i < size; ++i) { if (std::isnan(f_expected[i])) { @@ -389,9 +407,7 @@ struct TensorCheck { } else if (std::isinf(f_expected[i])) { // Test infinity for equality EXPECT_EQ(f_expected[i], f_actual[i]) << "Expected infinity. i:" << i; } else { - float tolerance = has_tolerance - ? get_tolerance(params, f_expected[i]) - : get_tolerance(abs_threshold, rel_threshold, f_expected[i]); + float tolerance = get_tolerance(tolerance_params, f_expected[i]); EXPECT_NEAR(f_expected[i], f_actual[i], tolerance) << "i:" << i; } } diff --git a/onnxruntime/test/providers/cpu/activation/activation_op_test.h b/onnxruntime/test/providers/cpu/activation/activation_op_test.h index 984b8f4437a3b..a0a0c28a0e26a 100644 --- a/onnxruntime/test/providers/cpu/activation/activation_op_test.h +++ b/onnxruntime/test/providers/cpu/activation/activation_op_test.h @@ -69,6 +69,11 @@ inline void TestActivationOp(const char* szOp, const std::vector> test.SetOutputRelErr("Y", .000001f); } #endif + + if (strcmp(szOp, "QuickGelu") == 0) { + test.SetOutputTolerance(0.0001f, 0.0001f); + } + test.Run(OpTester::ExpectResult::kExpectSuccess, "", excluded_providers); } } diff --git a/onnxruntime/test/providers/cpu/math/einsum_test.cc b/onnxruntime/test/providers/cpu/math/einsum_test.cc index 4e968d3de6b8a..d9f7ab6ea50fc 100644 --- a/onnxruntime/test/providers/cpu/math/einsum_test.cc +++ b/onnxruntime/test/providers/cpu/math/einsum_test.cc @@ -10,6 +10,11 @@ namespace onnxruntime { namespace test { +// Exclude TRT EP in many tests due to Segmentation fault in A100 +static void run_excluding_trt(OpTester& test) { + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + // Tests are split up "theme-wise" (i.e.) each kind of operation Einsum can be used for // Within each theme we test "explicit" and "implicit" versions of the Einsum equation (wherever possible) // Some operations are not possible with implicit notation (reordering, reduction, etc.) @@ -50,7 +55,7 @@ TEST(Einsum, ExplicitEinsumAsTransposeOp_2D_input_With_Broadcasting) { test.AddAttribute("equation", "...i->i..."); test.AddInput("x", {2, 2}, {1.f, 2.f, 3.f, 4.f}); test.AddOutput("y", {2, 2}, {1.f, 3.f, 2.f, 4.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsBatchedTransposeOp_3D_input) { @@ -58,7 +63,7 @@ TEST(Einsum, ExplicitEinsumAsBatchedTransposeOp_3D_input) { test.AddAttribute("equation", "...ji->...ij"); test.AddInput("x", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("y", {2, 2, 2}, {1.f, 3.f, 2.f, 4.f, 1.f, 3.f, 2.f, 4.f}); - test.Run(); + run_excluding_trt(test); } // Implicit @@ -75,7 +80,7 @@ TEST(Einsum, ImplicitEinsumAsBatchedTransposeOp_3D_input) { test.AddAttribute("equation", "...ji"); test.AddInput("x", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("y", {2, 2, 2}, {1.f, 3.f, 2.f, 4.f, 1.f, 3.f, 2.f, 4.f}); - test.Run(); + run_excluding_trt(test); } // Theme: Axis/Axes reduction @@ -102,7 +107,7 @@ TEST(Einsum, ExplicitEinsumAsBatchedReduceOp_3D_input_0) { test.AddAttribute("equation", "...ji->...j"); test.AddInput("x", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("y", {2, 2}, {3.f, 7.f, 3.f, 7.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsBatchedReduceOp_3D_input_1) { @@ -110,7 +115,7 @@ TEST(Einsum, ExplicitEinsumAsBatchedReduceOp_3D_input_1) { test.AddAttribute("equation", "...ji->..."); test.AddInput("x", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("y", {2}, {10.f, 10.f}); - test.Run(); + run_excluding_trt(test); } // Implicit @@ -144,7 +149,7 @@ TEST(Einsum, ExplicitEinsumAsOuterProductWithTransposeOp_Multi_Input) { test.AddInput("y", {2}, {3.f, 4.f}); test.AddInput("z", {2}, {5.f, 6.f}); test.AddOutput("o", {2, 2, 2}, {15.f, 18.f, 30.f, 36.f, 20.f, 24.f, 40.f, 48.f}); - test.Run(); + run_excluding_trt(test); } // Implicit @@ -155,7 +160,7 @@ TEST(Einsum, ImplicitEinsumAsOuterProductOp_2D_input) { test.AddInput("y", {2}, {3.f, 4.f}); test.AddInput("z", {2}, {5.f, 6.f}); test.AddOutput("o", {2, 2, 2}, {15.f, 18.f, 20.f, 24.f, 30.f, 36.f, 40.f, 48.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ImplicitEinsumAsOuterProductOp_Multi_Input) { @@ -165,7 +170,7 @@ TEST(Einsum, ImplicitEinsumAsOuterProductOp_Multi_Input) { test.AddInput("y", {2}, {3.f, 4.f}); test.AddInput("z", {2}, {5.f, 6.f}); test.AddOutput("o", {2, 2, 2}, {15.f, 18.f, 20.f, 24.f, 30.f, 36.f, 40.f, 48.f}); - test.Run(); + run_excluding_trt(test); } // Theme: MatMul @@ -233,7 +238,7 @@ TEST(Einsum, ExplicitEinsumAsMatmul_Multi_Input) { test.AddInput("y", {2, 2}, {1.f, 2.f, 3.f, 4.f}); test.AddInput("z", {2, 2}, {1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2, 2}, {37.f, 81.f, 54.f, 118.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsBatchedMatmul) { @@ -251,7 +256,7 @@ TEST(Einsum, ExplicitEinsumAsBatchedMatmulWithBroadcasting_0) { test.AddInput("x", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddInput("y", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2, 2, 2}, {7.f, 10.f, 15.f, 22.f, 7.f, 10.f, 15.f, 22.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsBatchedMatmulWithBroadcasting_1) { @@ -260,7 +265,7 @@ TEST(Einsum, ExplicitEinsumAsBatchedMatmulWithBroadcasting_1) { test.AddInput("x", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddInput("y", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2, 2, 2}, {14.f, 20.f, 30.f, 44.f, 14.f, 20.f, 30.f, 44.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsMatmul_OutputTransposed) { @@ -303,7 +308,7 @@ TEST(Einsum, ImplicitEinsumAsMatmul_Multi_Input) { test.AddInput("y", {2, 2}, {1.f, 2.f, 3.f, 4.f}); test.AddInput("z", {2, 2}, {1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2, 2}, {37.f, 54.f, 81.f, 118.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ImplicitEinsumAsBatchedMatmul) { OpTester test("Einsum", 12, onnxruntime::kOnnxDomain); @@ -320,7 +325,7 @@ TEST(Einsum, ImplicitEinsumAsBatchedMatmulWithBroadcasting_0) { test.AddInput("x", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddInput("y", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2, 2, 2}, {7.f, 10.f, 15.f, 22.f, 7.f, 10.f, 15.f, 22.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ImplicitEinsumAsMatmul_2) { @@ -343,7 +348,7 @@ TEST(Einsum, DiagonalWithMatmul) { test.AddInput("x", {2, 2, 3}, {1.f, 2.f, 3.f, 1.f, 2.f, 3.f, 1.f, 2.f, 3.f, 1.f, 2.f, 3.f}); test.AddInput("y", {3, 3}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f}); test.AddOutput("o", {3}, {60.f, 72.f, 84.f}); - test.Run(); + run_excluding_trt(test); } // Theme: Diagonal parsing @@ -354,7 +359,7 @@ TEST(Einsum, ExplicitEinsumAsDiagonalOp) { test.AddAttribute("equation", "ii->i"); test.AddInput("x", {2, 2}, {1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2}, {1.f, 4.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsDiagonalOp_1) { @@ -362,7 +367,7 @@ TEST(Einsum, ExplicitEinsumAsDiagonalOp_1) { test.AddAttribute("equation", "iii->i"); test.AddInput("x", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2}, {1.f, 4.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsDiagonalOpWithAxisReduced) { @@ -370,7 +375,7 @@ TEST(Einsum, ExplicitEinsumAsDiagonalOpWithAxisReduced) { test.AddAttribute("equation", "iji->j"); test.AddInput("x", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2}, {3.f, 7.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsDiagonalOpWithAxisPreserved) { @@ -378,7 +383,7 @@ TEST(Einsum, ExplicitEinsumAsDiagonalOpWithAxisPreserved) { test.AddAttribute("equation", "iji->ij"); test.AddInput("x", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2, 2}, {1.f, 3.f, 2.f, 4.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsDiagonalOpWithTranspose) { @@ -386,7 +391,7 @@ TEST(Einsum, ExplicitEinsumAsDiagonalOpWithTranspose) { test.AddAttribute("equation", "iji->ji"); test.AddInput("x", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2, 2}, {1.f, 2.f, 3.f, 4.f}); - test.Run(); + run_excluding_trt(test); } // ROCm doesn't support double @@ -396,7 +401,7 @@ TEST(Einsum, ExplicitEinsumAsDiagonalOpWithTranspose_double) { test.AddAttribute("equation", "iji->ji"); test.AddInput("x", {2, 2, 2}, {1., 2., 3., 4., 1., 2., 3., 4.}); test.AddOutput("o", {2, 2}, {1., 2., 3., 4.}); - test.Run(); + run_excluding_trt(test); } #endif @@ -405,7 +410,7 @@ TEST(Einsum, ExplicitEinsumAsDiagonalOpWithTranspose_int32) { test.AddAttribute("equation", "iji->ji"); test.AddInput("x", {2, 2, 2}, {1, 2, 3, 4, 1, 2, 3, 4}); test.AddOutput("o", {2, 2}, {1, 2, 3, 4}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsDiagonalOpWithTranspose_int64) { @@ -413,14 +418,14 @@ TEST(Einsum, ExplicitEinsumAsDiagonalOpWithTranspose_int64) { test.AddAttribute("equation", "iji->ji"); test.AddInput("x", {2, 2, 2}, {1, 2, 3, 4, 1, 2, 3, 4}); test.AddOutput("o", {2, 2}, {1, 2, 3, 4}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsBatchedDiagonalOp) { OpTester test("Einsum", 12, onnxruntime::kOnnxDomain); test.AddAttribute("equation", "...ii->...i"); test.AddInput("x", {3, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {3, 2}, {1.f, 4.f, 1.f, 4.f, 1.f, 4.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsBatchedDiagonalOp_1) { @@ -428,7 +433,7 @@ TEST(Einsum, ExplicitEinsumAsBatchedDiagonalOp_1) { test.AddAttribute("equation", "...iij->...j"); test.AddInput("x", {2, 2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2, 2}, {4.f, 6.f, 4.f, 6.f}); - test.Run(); + run_excluding_trt(test); } // Implicit (Implicit diagonal ops will sum up diagonal values) @@ -442,7 +447,7 @@ TEST(Einsum, ImplicitEinsumAsDiagonalOp) { test.AddAttribute("equation", "ii"); test.AddInput("x", {2, 2}, {1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {}, {5.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ImplicitEinsumAsDiagonalOp_1) { @@ -455,7 +460,7 @@ TEST(Einsum, ImplicitEinsumAsDiagonalOp_1) { test.AddAttribute("equation", "iii"); test.AddInput("x", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {}, {5.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ImplicitEinsumAsDiagonalOpWithAxisReduced) { @@ -463,7 +468,7 @@ TEST(Einsum, ImplicitEinsumAsDiagonalOpWithAxisReduced) { test.AddAttribute("equation", "iji"); test.AddInput("x", {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2}, {3.f, 7.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ImplicitEinsumAsBatchedDiagonalOp) { @@ -471,7 +476,7 @@ TEST(Einsum, ImplicitEinsumAsBatchedDiagonalOp) { test.AddAttribute("equation", "...ii"); test.AddInput("x", {2, 1, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2, 1}, {5.f, 5.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ImplicitEinsumAsBatchedDiagonalOp_1) { @@ -479,7 +484,7 @@ TEST(Einsum, ImplicitEinsumAsBatchedDiagonalOp_1) { test.AddAttribute("equation", "...iij"); test.AddInput("x", {2, 2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2, 2}, {4.f, 6.f, 4.f, 6.f}); - test.Run(); + run_excluding_trt(test); } // Theme: Scalar inputs and outputs @@ -491,7 +496,7 @@ TEST(Einsum, ExplicitEinsumAsElementwiseMulOpWithOneScalar) { test.AddInput("x", {}, {10.f}); test.AddInput("y", {2, 2}, {1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2, 2}, {10.f, 20.f, 30.f, 40.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsElementwiseMulOpWithTwoScalars_Multi_Input) { @@ -501,7 +506,7 @@ TEST(Einsum, ExplicitEinsumAsElementwiseMulOpWithTwoScalars_Multi_Input) { test.AddInput("y", {2, 2}, {1.f, 2.f, 3.f, 4.f}); test.AddInput("z", {}, {10.f}); test.AddOutput("o", {2, 2}, {100.f, 200.f, 300.f, 400.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsElementwiseMulOpWithAllScalars) { OpTester test("Einsum", 12, onnxruntime::kOnnxDomain); @@ -527,7 +532,7 @@ TEST(Einsum, ImplicitEinsumAsElementwiseMulOpWithOneScalar) { test.AddInput("x", {}, {10.f}); test.AddInput("y", {2, 2}, {1.f, 2.f, 3.f, 4.f}); test.AddOutput("o", {2, 2}, {10.f, 20.f, 30.f, 40.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ImplicitEinsumAsElementwiseMulOpWithThreeScalars_Multi_Input) { @@ -538,7 +543,7 @@ TEST(Einsum, ImplicitEinsumAsElementwiseMulOpWithThreeScalars_Multi_Input) { test.AddInput("c", {}, {10.f}); test.AddInput("d", {}, {10.f}); test.AddOutput("o", {2, 2}, {1000.f, 2000.f, 3000.f, 4000.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ImplicitEinsumAsElementwiseMulOpWithAllScalars) { OpTester test("Einsum", 12, onnxruntime::kOnnxDomain); @@ -568,7 +573,7 @@ TEST(Einsum, ExplicitEinsumAsTensorContractionReshapeFinal) { test.AddInput("y", {2, 2}, {1.f, 2.f, -6.f, 2.f}); test.AddInput("z", {2, 2}, {3.f, 4.f, 5.f, 6.f}); test.AddOutput("o", {2, 2, 2}, {63.f, -132.f, 63.f, -132.f, 63.f, -132.f, 63.f, -132.f}); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsTensorContractionReshapeLeft) { @@ -720,7 +725,7 @@ TEST(Einsum, ExplicitEinsumAsDiagonalOp_Half) { ConvertFloatToMLFloat16(output_f.data(), output.data(), 2); test.AddInput("x", {2, 2}, input_x); test.AddOutput("o", {2}, output); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsElementwiseMulOpWithOneScalar_Half) { @@ -741,7 +746,7 @@ TEST(Einsum, ExplicitEinsumAsElementwiseMulOpWithOneScalar_Half) { test.AddInput("x", {}, input_x); test.AddInput("y", {2, 2}, input_y); test.AddOutput("o", {2, 2}, output); - test.Run(); + run_excluding_trt(test); } TEST(Einsum, ExplicitEinsumAsTensorContraction_Half) { @@ -2093,7 +2098,7 @@ TEST_P(EinsumTransposeMatMulThreeInputsTest, EinsumTransposeMatMulThreeInputsTes std::vector v1(tst.shape.begin(), tst.shape.end()); std::vector v2(tst.expected.begin(), tst.expected.end()); test.AddOutput("o", v1, v2); - test.Run(); + run_excluding_trt(test); } INSTANTIATE_TEST_SUITE_P(EinsumTransposeMatMulThreeInputsTests, EinsumTransposeMatMulThreeInputsTest, testing::ValuesIn(case1)); diff --git a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc index d35e5c78cfd69..090022d9e68c9 100644 --- a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc +++ b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc @@ -1370,7 +1370,8 @@ static void TestSumMultipleInputsNoBroadcasting(size_t num_inputs, const TensorS test.AddOutput("sum", dims, expected_output_data); - test.Run(); + // TensorRT EP Segmentation fault: https://github.com/microsoft/onnxruntime/issues/19530 + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); } TEST(MathOpTest, SumMultipleInputsNoBroadcasting) { @@ -2630,7 +2631,7 @@ TEST(MathOpTest, Mean_8) { #endif template -void TrigFloatTest(OpTester& test, std::initializer_list input) { +void TrigFloatTest(OpTester& test, std::initializer_list input, float tolerance = -1.0f) { std::vector dims{static_cast(input.size())}; std::vector output; @@ -2639,6 +2640,12 @@ void TrigFloatTest(OpTester& test, std::initializer_list input) { test.AddInput("X", dims, input); test.AddOutput("Y", dims, output); + + if (tolerance > 0.0f) { + test.SetOutputAbsErr("Y", tolerance); + test.SetOutputRelErr("Y", tolerance); + } + test.Run(); } @@ -2708,6 +2715,7 @@ TEST(MathOpTest, CosFloat16) { TrigFloat16Test<::cosf>(test, {1.1f, -1.1f, 2.2f, -2.2f}); } } + TEST(MathOpTest, Tan) { OpTester test("Tan"); TrigFloatTest<::tanf>(test, {-100.0f, -50.0f, 0.0f, 50.0f, 100.0f}); @@ -2715,7 +2723,8 @@ TEST(MathOpTest, Tan) { TEST(MathOpTest, Asin) { OpTester test("Asin"); - TrigFloatTest<::asinf>(test, {-1.0f, -0.5f, 0.0f, 0.5f, 1.0f}); + float tolerance = DefaultDmlExecutionProvider().get() != nullptr ? 0.0001f : -1.0f; + TrigFloatTest<::asinf>(test, {-1.0f, -0.5f, 0.0f, 0.5f, 1.0f}, tolerance); } TEST(MathOpTest, Acos) { diff --git a/onnxruntime/test/providers/cpu/math/logsoftmax_test.cc b/onnxruntime/test/providers/cpu/math/logsoftmax_test.cc index 273503e7bf6af..d7befe7820407 100644 --- a/onnxruntime/test/providers/cpu/math/logsoftmax_test.cc +++ b/onnxruntime/test/providers/cpu/math/logsoftmax_test.cc @@ -15,7 +15,8 @@ static void RunTest(const std::vector& x_vals, int64_t axis = 1, bool is_tensorrt_supported = true, OpTester::ExpectResult expect_result = OpTester::ExpectResult::kExpectSuccess, - const std::string& error_msg = "") { + const std::string& error_msg = "", + float tolerance = 0.0f) { OpTester tester("LogSoftmax", opset); if (opset < 13) { @@ -31,6 +32,11 @@ static void RunTest(const std::vector& x_vals, tester.AddInput("X", dimensions, x_vals); tester.AddOutput("Y", dimensions, expected_vals); + if (tolerance != 0.0f) { + tester.SetOutputAbsErr("Y", tolerance); + tester.SetOutputRelErr("Y", tolerance); + } + std::unordered_set excluded_providers; if (!is_tensorrt_supported) { excluded_providers.insert(kTensorrtExecutionProvider); @@ -62,7 +68,7 @@ TEST(LogSoftmaxOperator, LargeNumber) { -3.4401896f, -2.4401896f, -1.44018972f, -0.44018969f}; std::vector dimensions = {2, 4}; - RunTest(x_vals, expected_vals, dimensions); + RunTest(x_vals, expected_vals, dimensions, 7, 1, true, OpTester::ExpectResult::kExpectSuccess, "", 0.0005f); } // np.random.seed(123) # Use a seed so we can replicate the input and expected values here and in python diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index af71fe5cf79ae..58df7763786b2 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -42,6 +42,10 @@ #include "core/providers/armnn/armnn_provider_factory.h" #endif +#ifdef USE_CUDA +#include "core/providers/cuda/cuda_provider_options.h" +#endif + #include "test/common/cuda_op_test_utils.h" // test infrastructure @@ -98,21 +102,6 @@ TEST_P(ModelTest, Run) { std::unique_ptr model_info = std::make_unique(model_path.c_str()); -#if defined(__linux__) - // ORT enables TF32 in GEMM for A100. TF32 will cause precsion loss and fail this test. - if (HasCudaEnvironment(800) && provider_name == "cuda") { - per_sample_tolerance = 1e-1; - if (model_path.find(ORT_TSTR("SSD")) > 0 || - model_path.find(ORT_TSTR("ssd")) > 0 || - model_path.find(ORT_TSTR("yolov3")) > 0 || - model_path.find(ORT_TSTR("mask_rcnn")) > 0 || - model_path.find(ORT_TSTR("FNS")) > 0) { - SkipTest("Skipping SSD test for big tolearance failure or other errors"); - return; - } - } -#endif - if (model_info->HasDomain(ONNX_NAMESPACE::AI_ONNX_TRAINING_DOMAIN) || model_info->HasDomain(ONNX_NAMESPACE::AI_ONNX_PREVIEW_TRAINING_DOMAIN)) { SkipTest("it has the training domain. No pipeline should need to run these tests."); @@ -198,6 +187,7 @@ TEST_P(ModelTest, Run) { std::string device_id = Env::Default().GetEnvironmentVar("ONNXRUNTIME_TEST_GPU_DEVICE_ID"); values.push_back(device_id.empty() ? "0" : device_id.c_str()); ASSERT_ORT_STATUS_OK(OrtApis::UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), 1)); + cuda_options->use_tf32 = false; ortso.AppendExecutionProvider_CUDA_V2(*cuda_options); } else if (provider_name == "rocm") { OrtROCMProviderOptions ep_options; @@ -229,6 +219,7 @@ TEST_P(ModelTest, Run) { ASSERT_ORT_STATUS_OK(OrtApis::CreateCUDAProviderOptions(&cuda_options)); std::unique_ptr rel_cuda_options( cuda_options, &OrtApis::ReleaseCUDAProviderOptions); + cuda_options->use_tf32 = false; ortso.AppendExecutionProvider_CUDA_V2(*cuda_options); } else if (provider_name == "migraphx") { OrtMIGraphXProviderOptions ep_options; diff --git a/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc b/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc index 3d30fc62a945d..76df28399564c 100644 --- a/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc +++ b/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc @@ -905,14 +905,16 @@ TEST(BatchNormTest, ForwardTrainingTestWithSavedOutputsOpset9) { test.AddInput("var", channel_dims, {1.0f, 2.0f}); test.AddOutput("Y", input_output_dims, {0.0131f, 0.5210f, 1.7244f, 0.1387f, -0.2708f, -0.1191f, 1.2089f, -0.0922f, -0.9548f, -1.5203f, 0.9077f, -0.8298f, 0.5796f, -0.4501f, -2.0921f, 1.2358f}); - test.AddOutput("running_mean", channel_dims, {-0.1754f, 0.303106f}); test.AddOutput("running_var", channel_dims, {0.696052f, 1.41316f}); + // mean and variance of X across channel dimension // With Opset9 we output saved_inv_std instead of saved_var to match CUDA EP test.AddOutput("saved_mean", channel_dims, {-0.306f, 0.114562f}); test.AddOutput("saved_inv_std", channel_dims, {1.2288f, 0.861317f}); + test.SetOutputTolerance(0.0001f, 0.0001f); + // exclude CUDA Execution Provider due to flakiness // exclude TRT and OpenVINO for same reasons as seen in TestBatchNorm() test.Run(OpTester::ExpectResult::kExpectSuccess, "", @@ -938,10 +940,11 @@ TEST(BatchNormTest, ForwardTrainingTestOpset14) { test.AddInput("var", channel_dims, {1.0f, 2.0f}); test.AddOutput("Y", input_output_dims, {0.0131f, 0.5210f, 1.7244f, 0.1387f, -0.2708f, -0.1191f, 1.2089f, -0.0922f, -0.9548f, -1.5203f, 0.9077f, -0.8298f, 0.5796f, -0.4501f, -2.0921f, 1.2358f}); - test.AddOutput("running_mean", channel_dims, {-0.1754f, 0.303106f}); test.AddOutput("running_var", channel_dims, {0.696052f, 1.41316f}); + test.SetOutputTolerance(0.0001f, 0.0001f); + // exclude CUDA Execution Provider due to flakiness // exclude TRT and OpenVINO for same reasons as seen in TestBatchNorm() test.Run(OpTester::ExpectResult::kExpectSuccess, "", @@ -970,6 +973,8 @@ TEST(BatchNormTest, ForwardTrainingTestOpset15) { test.AddOutput("running_mean", channel_dims, {-0.1754f, 0.303106f}); test.AddOutput("running_var", channel_dims, {0.696052f, 1.41316f}); + test.SetOutputTolerance(0.0001f, 0.0001f); + // Same exclusions as the opset 14 test test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kCudaNHWCExecutionProvider, kRocmExecutionProvider, diff --git a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc index e24cda17166ed..b59d4492064a4 100644 --- a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc +++ b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc @@ -888,6 +888,7 @@ TEST(PoolTest, AveragePool_IncludePadPixel) { test.AddInput("X", x_dims, x_vals); test.AddOutput("Y", expected_dims, expected_vals); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaNHWCExecutionProvider, kTensorrtExecutionProvider}); } diff --git a/onnxruntime/test/providers/cpu/object_detection/roialign_test.cc b/onnxruntime/test/providers/cpu/object_detection/roialign_test.cc index 2f97f6e71e92b..12976c040cdf7 100644 --- a/onnxruntime/test/providers/cpu/object_detection/roialign_test.cc +++ b/onnxruntime/test/providers/cpu/object_detection/roialign_test.cc @@ -463,6 +463,7 @@ static void BasicTest() { 0.3661f, 0.2349f, }); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -713,7 +714,8 @@ TEST(RoiAlignTest, AvgModeNegativeInvalidMode) { test.AddInput("batch_indices", {5}, {0, 0, 0, 0, 0}); test.AddOutput("Y", {5, 3, 3, 4}, {2.95833f, 3.20833f, 3.45833f, 3.70833f, 4.625f, 4.875f, 5.125f, 5.375f, 6.29167f, 6.54167f, 6.79167f, 7.04167f, 27.9583f, 28.2083f, 28.4583f, 28.7083f, 29.625f, 29.875f, 30.125f, 30.375f, 31.2917f, 31.5417f, 31.7917f, 32.0417f, 52.9583f, 53.2083f, 53.4583f, 53.7083f, 54.625f, 54.875f, 55.125f, 55.375f, 56.2917f, 56.5417f, 56.7917f, 57.0417f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f, 50.f, 50.f, 50.f, 50.f, 50.f, 50.f, 50.f, 50.f, 50.f, 50.f, 50.f, 50.f, 7.39583f, 7.39583f, 7.42708f, 7.64583f, 9.0625f, 9.0625f, 9.09375f, 9.3125f, 10.7292f, 10.7292f, 10.7604f, 10.9792f, 32.3958f, 32.3958f, 32.4271f, 32.6458f, 34.0625f, 34.0625f, 34.0938f, 34.3125f, 35.7292f, 35.7292f, 35.7604f, 35.9792f, 57.3958f, 57.3958f, 57.4271f, 57.6458f, 59.0625f, 59.0625f, 59.0938f, 59.3125f, 60.7292f, 60.7292f, 60.7604f, 60.9792f, 4.27083f, 4.52083f, 4.77083f, 5.02083f, 5.9375f, 6.1875f, 6.4375f, 6.6875f, 7.60417f, 7.85417f, 8.10417f, 8.35417f, 29.2708f, 29.5208f, 29.7708f, 30.0208f, 30.9375f, 31.1875f, 31.4375f, 31.6875f, 32.6042f, 32.8542f, 33.1042f, 33.3542f, 54.2708f, 54.5208f, 54.7708f, 55.0208f, 55.9375f, 56.1875f, 56.4375f, 56.6875f, 57.6042f, 57.8542f, 58.1042f, 58.3542f, 6.77083f, 6.77083f, 6.77083f, 6.80208f, 8.4375f, 8.4375f, 8.4375f, 8.46875f, 10.1042f, 10.1042f, 10.1042f, 10.1354f, 31.7708f, 31.7708f, 31.7708f, 31.8021f, 33.4375f, 33.4375f, 33.4375f, 33.4688f, 35.1042f, 35.1042f, 35.1042f, 35.1354f, 56.7708f, 56.7708f, 56.7708f, 56.8021f, 58.4375f, 58.4375f, 58.4375f, 58.4688f, 60.1042f, 60.1042f, 60.1042f, 60.1354f}); - test.Run(OpTester::ExpectResult::kExpectFailure, "Invalid mode"); + // Exclude TRT EP due to Segmentation fault in A100 + test.Run(OpTester::ExpectResult::kExpectFailure, "Invalid mode", {kTensorrtExecutionProvider}); } TEST(RoiAlignTest, AvgModeNegativeSamplingRatio) { @@ -738,7 +740,8 @@ TEST(RoiAlignTest, AvgModeNegativeSamplingRatio) { test.AddInput("batch_indices", {5}, {0, 0, 0, 0, 0}); test.AddOutput("Y", {5, 3, 3, 4}, {2.95833f, 3.20833f, 3.45833f, 3.70833f, 4.625f, 4.875f, 5.125f, 5.375f, 6.29167f, 6.54167f, 6.79167f, 7.04167f, 27.9583f, 28.2083f, 28.4583f, 28.7083f, 29.625f, 29.875f, 30.125f, 30.375f, 31.2917f, 31.5417f, 31.7917f, 32.0417f, 52.9583f, 53.2083f, 53.4583f, 53.7083f, 54.625f, 54.875f, 55.125f, 55.375f, 56.2917f, 56.5417f, 56.7917f, 57.0417f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f, 50.f, 50.f, 50.f, 50.f, 50.f, 50.f, 50.f, 50.f, 50.f, 50.f, 50.f, 50.f, 7.39583f, 7.39583f, 7.42708f, 7.64583f, 9.0625f, 9.0625f, 9.09375f, 9.3125f, 10.7292f, 10.7292f, 10.7604f, 10.9792f, 32.3958f, 32.3958f, 32.4271f, 32.6458f, 34.0625f, 34.0625f, 34.0938f, 34.3125f, 35.7292f, 35.7292f, 35.7604f, 35.9792f, 57.3958f, 57.3958f, 57.4271f, 57.6458f, 59.0625f, 59.0625f, 59.0938f, 59.3125f, 60.7292f, 60.7292f, 60.7604f, 60.9792f, 4.27083f, 4.52083f, 4.77083f, 5.02083f, 5.9375f, 6.1875f, 6.4375f, 6.6875f, 7.60417f, 7.85417f, 8.10417f, 8.35417f, 29.2708f, 29.5208f, 29.7708f, 30.0208f, 30.9375f, 31.1875f, 31.4375f, 31.6875f, 32.6042f, 32.8542f, 33.1042f, 33.3542f, 54.2708f, 54.5208f, 54.7708f, 55.0208f, 55.9375f, 56.1875f, 56.4375f, 56.6875f, 57.6042f, 57.8542f, 58.1042f, 58.3542f, 6.77083f, 6.77083f, 6.77083f, 6.80208f, 8.4375f, 8.4375f, 8.4375f, 8.46875f, 10.1042f, 10.1042f, 10.1042f, 10.1354f, 31.7708f, 31.7708f, 31.7708f, 31.8021f, 33.4375f, 33.4375f, 33.4375f, 33.4688f, 35.1042f, 35.1042f, 35.1042f, 35.1354f, 56.7708f, 56.7708f, 56.7708f, 56.8021f, 58.4375f, 58.4375f, 58.4375f, 58.4688f, 60.1042f, 60.1042f, 60.1042f, 60.1354f}); - test.Run(OpTester::ExpectResult::kExpectFailure, "Sampling ratio should be >=0"); + // Exclude TRT EP due to Segmentation fault in A100 + test.Run(OpTester::ExpectResult::kExpectFailure, "Sampling ratio should be >=0", {kTensorrtExecutionProvider}); } TEST(RoiAlignTest, AvgModeNegativeInvalidNumRoiDims) { diff --git a/onnxruntime/test/providers/cpu/rnn/deep_cpu_lstm_op_test.cc b/onnxruntime/test/providers/cpu/rnn/deep_cpu_lstm_op_test.cc index 7e81fc80ddf85..ac7bd4ddb17d3 100644 --- a/onnxruntime/test/providers/cpu/rnn/deep_cpu_lstm_op_test.cc +++ b/onnxruntime/test/providers/cpu/rnn/deep_cpu_lstm_op_test.cc @@ -143,6 +143,8 @@ static void RunLstmTest(const std::vector& X_data, test.AddOptionalOutputEdge(); } + test.SetOutputTolerance(0.0001f, 0.0001f); + // TensorRT failed on LSTM tests test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); } diff --git a/onnxruntime/test/providers/cpu/tensor/affine_grid_test.cc b/onnxruntime/test/providers/cpu/tensor/affine_grid_test.cc index e37e784f28930..98f7eb6133822 100644 --- a/onnxruntime/test/providers/cpu/tensor/affine_grid_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/affine_grid_test.cc @@ -13,6 +13,7 @@ TEST(AffineGridTest, 2d) { test.AddInput("size", {4}, {1, 1, 2, 3}); test.AddOutput("grid", {1, 2, 3, 2}, {-0.6667f, -0.5000f, 0.0000f, -0.5000f, 0.6667f, -0.5000f, -0.6667f, 0.5000f, 0.0000f, 0.5000f, 0.6667f, 0.5000f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -24,6 +25,7 @@ TEST(AffineGridTest, test_2d_0) { test.AddInput("theta", {1, 2, 3}, {1.477212f, -0.173648f, 0.300000f, 0.173648f, 0.492404f, -0.500000f}); test.AddInput("size", {4}, {1, 1, 3, 2}); test.AddOutput("grid", {1, 3, 2, 2}, {-0.3228f, -0.9151f, 1.1544f, -0.7414f, -0.4386f, -0.5868f, 1.0386f, -0.4132f, -0.5544f, -0.2586f, 0.9228f, -0.0849f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -33,6 +35,7 @@ TEST(AffineGridTest, test_2d_1) { test.AddInput("theta", {2, 2, 3}, {1.477212f, -0.173648f, 0.300000f, 0.173648f, 0.492404f, -0.500000f, 1.477212f, -0.173648f, 0.300000f, 0.173648f, 0.492404f, -0.500000f}); test.AddInput("size", {4}, {2, 10, 2, 3}); test.AddOutput("grid", {2, 2, 3, 2}, {-0.5980f, -0.8620f, 0.3868f, -0.7462f, 1.3716f, -0.6304f, -0.7716f, -0.3696f, 0.2132f, -0.2538f, 1.1980f, -0.1380f, -0.5980f, -0.8620f, 0.3868f, -0.7462f, 1.3716f, -0.6304f, -0.7716f, -0.3696f, 0.2132f, -0.2538f, 1.1980f, -0.1380f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -42,6 +45,7 @@ TEST(AffineGridTest, test_2d_2) { test.AddInput("theta", {1, 2, 3}, {1.500000f, -0.866025f, -0.500000f, 0.866025f, 2.750000f, -0.500000f}); test.AddInput("size", {4}, {1, 1, 3, 2}); test.AddOutput("grid", {1, 3, 2, 2}, {-0.6726f, -2.7663f, 0.8274f, -1.9003f, -1.2500f, -0.9330f, 0.2500f, -0.0670f, -1.8274f, 0.9003f, -0.3274f, 1.7663f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -51,6 +55,7 @@ TEST(AffineGridTest, test_2d_3) { test.AddInput("theta", {2, 2, 3}, {1.500000f, -0.866025f, -0.500000f, 0.866025f, 2.750000f, -0.500000f, 1.500000f, -0.866025f, -0.500000f, 0.866025f, 2.750000f, -0.500000f}); test.AddInput("size", {4}, {2, 10, 2, 3}); test.AddOutput("grid", {2, 2, 3, 2}, {-1.0670f, -2.4524f, -0.0670f, -1.8750f, 0.9330f, -1.2976f, -1.9330f, 0.2976f, -0.9330f, 0.8750f, 0.0670f, 1.4524f, -1.0670f, -2.4524f, -0.0670f, -1.8750f, 0.9330f, -1.2976f, -1.9330f, 0.2976f, -0.9330f, 0.8750f, 0.0670f, 1.4524f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -60,6 +65,7 @@ TEST(AffineGridTest, test_2d_4) { test.AddInput("theta", {1, 2, 3}, {1.477212f, -0.173648f, 0.300000f, 0.173648f, 0.492404f, -0.500000f}); test.AddInput("size", {4}, {1, 1, 3, 2}); test.AddOutput("grid", {1, 3, 2, 2}, {-1.0036f, -1.1661f, 1.9509f, -0.8188f, -1.1772f, -0.6736f, 1.7772f, -0.3264f, -1.3509f, -0.1812f, 1.6036f, 0.1661f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -69,6 +75,7 @@ TEST(AffineGridTest, test_2d_5) { test.AddInput("theta", {2, 2, 3}, {1.477212f, -0.173648f, 0.300000f, 0.173648f, 0.492404f, -0.500000f, 1.477212f, -0.173648f, 0.300000f, 0.173648f, 0.492404f, -0.500000f}); test.AddInput("size", {4}, {2, 10, 2, 3}); test.AddOutput("grid", {2, 2, 3, 2}, {-1.0036f, -1.1661f, 0.4736f, -0.9924f, 1.9509f, -0.8188f, -1.3509f, -0.1812f, 0.1264f, -0.0076f, 1.6036f, 0.1661f, -1.0036f, -1.1661f, 0.4736f, -0.9924f, 1.9509f, -0.8188f, -1.3509f, -0.1812f, 0.1264f, -0.0076f, 1.6036f, 0.1661f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -78,6 +85,7 @@ TEST(AffineGridTest, test_2d_6) { test.AddInput("theta", {1, 2, 3}, {1.500000f, -0.866025f, -0.500000f, 0.866025f, 2.750000f, -0.500000f}); test.AddInput("size", {4}, {1, 1, 3, 2}); test.AddOutput("grid", {1, 3, 2, 2}, {-1.1340f, -4.1160f, 1.8660f, -2.3840f, -2.0000f, -1.3660f, 1.0000f, 0.3660f, -2.8660f, 1.3840f, 0.1340f, 3.1160f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -87,6 +95,7 @@ TEST(AffineGridTest, test_2d_7) { test.AddInput("theta", {2, 2, 3}, {1.500000f, -0.866025f, -0.500000f, 0.866025f, 2.750000f, -0.500000f, 1.500000f, -0.866025f, -0.500000f, 0.866025f, 2.750000f, -0.500000f}); test.AddInput("size", {4}, {2, 10, 2, 3}); test.AddOutput("grid", {2, 2, 3, 2}, {-1.1340f, -4.1160f, 0.3660f, -3.2500f, 1.8660f, -2.3840f, -2.8660f, 1.3840f, -1.3660f, 2.2500f, 0.1340f, 3.1160f, -1.1340f, -4.1160f, 0.3660f, -3.2500f, 1.8660f, -2.3840f, -2.8660f, 1.3840f, -1.3660f, 2.2500f, 0.1340f, 3.1160f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -96,6 +105,7 @@ TEST(AffineGridTest, test_3d_0) { test.AddInput("theta", {1, 3, 4}, {1.409539f, 0.000000f, 0.513030f, 0.300000f, 0.118782f, 1.969615f, -0.326352f, -0.500000f, -0.168412f, 0.086824f, 0.462708f, 1.800000f}); test.AddInput("size", {5}, {1, 1, 3, 2, 2}); test.AddOutput("grid", {1, 3, 2, 2, 3}, {-0.7468f, -1.3266f, 1.5323f, 0.6627f, -1.2078f, 1.3639f, -0.7468f, 0.6430f, 1.6191f, 0.6627f, 0.7618f, 1.4507f, -0.4048f, -1.5442f, 1.8408f, 1.0048f, -1.4254f, 1.6724f, -0.4048f, 0.4254f, 1.9276f, 1.0048f, 0.5442f, 1.7592f, -0.0627f, -1.7618f, 2.1493f, 1.3468f, -1.6430f, 1.9809f, -0.0627f, 0.2078f, 2.2361f, 1.3468f, 0.3266f, 2.0677f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -105,6 +115,7 @@ TEST(AffineGridTest, test_3d_1) { test.AddInput("theta", {2, 3, 4}, {1.409539f, 0.000000f, 0.513030f, 0.300000f, 0.118782f, 1.969615f, -0.326352f, -0.500000f, -0.168412f, 0.086824f, 0.462708f, 1.800000f, 1.409539f, 0.000000f, 0.513030f, 0.300000f, 0.118782f, 1.969615f, -0.326352f, -0.500000f, -0.168412f, 0.086824f, 0.462708f, 1.800000f}); test.AddInput("size", {5}, {2, 10, 2, 2, 3}); test.AddOutput("grid", {2, 2, 2, 3, 3}, {-0.8962f, -1.4008f, 1.6375f, 0.0435f, -1.3216f, 1.5252f, 0.9832f, -1.2424f, 1.4130f, -0.8962f, 0.5688f, 1.7243f, 0.0435f, 0.6480f, 1.6121f, 0.9832f, 0.7272f, 1.4998f, -0.3832f, -1.7272f, 2.1002f, 0.5565f, -1.6480f, 1.9879f, 1.4962f, -1.5688f, 1.8757f, -0.3832f, 0.2424f, 2.1870f, 0.5565f, 0.3216f, 2.0748f, 1.4962f, 0.4008f, 1.9625f, -0.8962f, -1.4008f, 1.6375f, 0.0435f, -1.3216f, 1.5252f, 0.9832f, -1.2424f, 1.4130f, -0.8962f, 0.5688f, 1.7243f, 0.0435f, 0.6480f, 1.6121f, 0.9832f, 0.7272f, 1.4998f, -0.3832f, -1.7272f, 2.1002f, 0.5565f, -1.6480f, 1.9879f, 1.4962f, -1.5688f, 1.8757f, -0.3832f, 0.2424f, 2.1870f, 0.5565f, 0.3216f, 2.0748f, 1.4962f, 0.4008f, 1.9625f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -114,6 +125,7 @@ TEST(AffineGridTest, test_3d_2) { test.AddInput("theta", {1, 3, 4}, {0.259808f, 0.000000f, -0.150000f, -0.500000f, -1.299038f, 1.500000f, -2.250000f, -0.500000f, 1.375000f, 4.763140f, 2.381570f, 0.300000f}); test.AddInput("size", {5}, {1, 1, 3, 2, 2}); test.AddOutput("grid", {1, 3, 2, 2, 3}, {-0.5299f, 0.8995f, -4.3568f, -0.2701f, -0.3995f, -2.9818f, -0.5299f, 2.3995f, 0.4064f, -0.2701f, 1.1005f, 1.7814f, -0.6299f, -0.6005f, -2.7691f, -0.3701f, -1.8995f, -1.3941f, -0.6299f, 0.8995f, 1.9941f, -0.3701f, -0.3995f, 3.3691f, -0.7299f, -2.1005f, -1.1814f, -0.4701f, -3.3995f, 0.1936f, -0.7299f, -0.6005f, 3.5818f, -0.4701f, -1.8995f, 4.9568f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -123,6 +135,7 @@ TEST(AffineGridTest, test_3d_3) { test.AddInput("theta", {2, 3, 4}, {0.259808f, 0.000000f, -0.150000f, -0.500000f, -1.299038f, 1.500000f, -2.250000f, -0.500000f, 1.375000f, 4.763140f, 2.381570f, 0.300000f, 0.259808f, 0.000000f, -0.150000f, -0.500000f, -1.299038f, 1.500000f, -2.250000f, -0.500000f, 1.375000f, 4.763140f, 2.381570f, 0.300000f}); test.AddInput("size", {5}, {2, 10, 2, 2, 3}); test.AddOutput("grid", {2, 2, 2, 3, 3}, {-0.5982f, 0.7410f, -4.1890f, -0.4250f, -0.1250f, -3.2724f, -0.2518f, -0.9910f, -2.3557f, -0.5982f, 2.2410f, 0.5741f, -0.4250f, 1.3750f, 1.4908f, -0.2518f, 0.5090f, 2.4075f, -0.7482f, -1.5090f, -1.8075f, -0.5750f, -2.3750f, -0.8908f, -0.4018f, -3.2410f, 0.0259f, -0.7482f, -0.0090f, 2.9557f, -0.5750f, -0.8750f, 3.8724f, -0.4018f, -1.7410f, 4.7890f, -0.5982f, 0.7410f, -4.1890f, -0.4250f, -0.1250f, -3.2724f, -0.2518f, -0.9910f, -2.3557f, -0.5982f, 2.2410f, 0.5741f, -0.4250f, 1.3750f, 1.4908f, -0.2518f, 0.5090f, 2.4075f, -0.7482f, -1.5090f, -1.8075f, -0.5750f, -2.3750f, -0.8908f, -0.4018f, -3.2410f, 0.0259f, -0.7482f, -0.0090f, 2.9557f, -0.5750f, -0.8750f, 3.8724f, -0.4018f, -1.7410f, 4.7890f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -132,6 +145,7 @@ TEST(AffineGridTest, test_3d_4) { test.AddInput("theta", {1, 3, 4}, {1.409539f, 0.000000f, 0.513030f, 0.300000f, 0.118782f, 1.969615f, -0.326352f, -0.500000f, -0.168412f, 0.086824f, 0.462708f, 1.800000f}); test.AddInput("size", {5}, {1, 1, 3, 2, 2}); test.AddOutput("grid", {1, 3, 2, 2, 3}, {-1.6226f, -2.2620f, 1.4189f, 1.1965f, -2.0245f, 1.0821f, -1.6226f, 1.6772f, 1.5925f, 1.1965f, 1.9147f, 1.2557f, -1.1095f, -2.5884f, 1.8816f, 1.7095f, -2.3508f, 1.5448f, -1.1095f, 1.3508f, 2.0552f, 1.7095f, 1.5884f, 1.7184f, -0.5965f, -2.9147f, 2.3443f, 2.2226f, -2.6772f, 2.0075f, -0.5965f, 1.0245f, 2.5179f, 2.2226f, 1.2620f, 2.1811f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -141,6 +155,7 @@ TEST(AffineGridTest, test_3d_5) { test.AddInput("theta", {2, 3, 4}, {1.409539f, 0.000000f, 0.513030f, 0.300000f, 0.118782f, 1.969615f, -0.326352f, -0.500000f, -0.168412f, 0.086824f, 0.462708f, 1.800000f, 1.409539f, 0.000000f, 0.513030f, 0.300000f, 0.118782f, 1.969615f, -0.326352f, -0.500000f, -0.168412f, 0.086824f, 0.462708f, 1.800000f}); test.AddInput("size", {5}, {2, 10, 2, 2, 3}); test.AddOutput("grid", {2, 2, 2, 3, 3}, {-1.6226f, -2.2620f, 1.4189f, -0.2130f, -2.1433f, 1.2505f, 1.1965f, -2.0245f, 1.0821f, -1.6226f, 1.6772f, 1.5925f, -0.2130f, 1.7960f, 1.4241f, 1.1965f, 1.9147f, 1.2557f, -0.5965f, -2.9147f, 2.3443f, 0.8130f, -2.7960f, 2.1759f, 2.2226f, -2.6772f, 2.0075f, -0.5965f, 1.0245f, 2.5179f, 0.8130f, 1.1433f, 2.3495f, 2.2226f, 1.2620f, 2.1811f, -1.6226f, -2.2620f, 1.4189f, -0.2130f, -2.1433f, 1.2505f, 1.1965f, -2.0245f, 1.0821f, -1.6226f, 1.6772f, 1.5925f, -0.2130f, 1.7960f, 1.4241f, 1.1965f, 1.9147f, 1.2557f, -0.5965f, -2.9147f, 2.3443f, 0.8130f, -2.7960f, 2.1759f, 2.2226f, -2.6772f, 2.0075f, -0.5965f, 1.0245f, 2.5179f, 0.8130f, 1.1433f, 2.3495f, 2.2226f, 1.2620f, 2.1811f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -150,6 +165,7 @@ TEST(AffineGridTest, test_3d_6) { test.AddInput("theta", {1, 3, 4}, {0.259808f, 0.000000f, -0.150000f, -0.500000f, -1.299038f, 1.500000f, -2.250000f, -0.500000f, 1.375000f, 4.763140f, 2.381570f, 0.300000f}); test.AddInput("size", {5}, {1, 1, 3, 2, 2}); test.AddOutput("grid", {1, 3, 2, 2, 3}, {-0.6098f, 1.5490f, -8.2197f, -0.0902f, -1.0490f, -5.4697f, -0.6098f, 4.5490f, 1.3066f, -0.0902f, 1.9510f, 4.0566f, -0.7598f, -0.7010f, -5.8381f, -0.2402f, -3.2990f, -3.0881f, -0.7598f, 2.2990f, 3.6881f, -0.2402f, -0.2990f, 6.4381f, -0.9098f, -2.9510f, -3.4566f, -0.3902f, -5.5490f, -0.7066f, -0.9098f, 0.0490f, 6.0697f, -0.3902f, -2.5490f, 8.8197f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } @@ -159,6 +175,7 @@ TEST(AffineGridTest, test_3d_7) { test.AddInput("theta", {2, 3, 4}, {0.259808f, 0.000000f, -0.150000f, -0.500000f, -1.299038f, 1.500000f, -2.250000f, -0.500000f, 1.375000f, 4.763140f, 2.381570f, 0.300000f, 0.259808f, 0.000000f, -0.150000f, -0.500000f, -1.299038f, 1.500000f, -2.250000f, -0.500000f, 1.375000f, 4.763140f, 2.381570f, 0.300000f}); test.AddInput("size", {5}, {2, 10, 2, 2, 3}); test.AddOutput("grid", {2, 2, 2, 3, 3}, {-0.6098f, 1.5490f, -8.2197f, -0.3500f, 0.2500f, -6.8447f, -0.0902f, -1.0490f, -5.4697f, -0.6098f, 4.5490f, 1.3066f, -0.3500f, 3.2500f, 2.6816f, -0.0902f, 1.9510f, 4.0566f, -0.9098f, -2.9510f, -3.4566f, -0.6500f, -4.2500f, -2.0816f, -0.3902f, -5.5490f, -0.7066f, -0.9098f, 0.0490f, 6.0697f, -0.6500f, -1.2500f, 7.4447f, -0.3902f, -2.5490f, 8.8197f, -0.6098f, 1.5490f, -8.2197f, -0.3500f, 0.2500f, -6.8447f, -0.0902f, -1.0490f, -5.4697f, -0.6098f, 4.5490f, 1.3066f, -0.3500f, 3.2500f, 2.6816f, -0.0902f, 1.9510f, 4.0566f, -0.9098f, -2.9510f, -3.4566f, -0.6500f, -4.2500f, -2.0816f, -0.3902f, -5.5490f, -0.7066f, -0.9098f, 0.0490f, 6.0697f, -0.6500f, -1.2500f, 7.4447f, -0.3902f, -2.5490f, 8.8197f}); + test.SetOutputTolerance(0.0001f, 0.0001f); test.Run(); } } // namespace test diff --git a/onnxruntime/test/providers/cpu/tensor/mean_variance_normalization_test.cc b/onnxruntime/test/providers/cpu/tensor/mean_variance_normalization_test.cc index b6720ae2a9a7d..be212515376fb 100644 --- a/onnxruntime/test/providers/cpu/tensor/mean_variance_normalization_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/mean_variance_normalization_test.cc @@ -5,6 +5,7 @@ #include "test/common/tensor_op_test_utils.h" #include "test/providers/provider_test_utils.h" +#include "test/util/include/default_providers.h" namespace onnxruntime::test { @@ -155,6 +156,10 @@ TEST(MeanVarianceNormalizationTest, AxesSubsets5D) { test.AddInput("input", shape, X.data(), X.size()); test.AddOutput("output", shape, Y.data(), Y.size()); + if (DefaultDmlExecutionProvider().get() != nullptr) { + test.SetOutputTolerance(0.001f, 0.001f); + } + test.Run(); }; diff --git a/onnxruntime/test/providers/cpu/tensor/onehot_op_test.cc b/onnxruntime/test/providers/cpu/tensor/onehot_op_test.cc index a2ffbdcc0bdf1..452094b484d4a 100644 --- a/onnxruntime/test/providers/cpu/tensor/onehot_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/onehot_op_test.cc @@ -9,6 +9,11 @@ using namespace std; namespace onnxruntime { namespace test { +// Exclude TRT EP in tests due to Segmentation fault in A100 +static void run_excluding_trt(OpTester& test) { + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + TEST(OneHotOpTest, DefaultAxis) { OpTester test("OneHot", 11); test.AddInput("indices", {2, 3}, {1, 9, 8, 2, 4, 6}); @@ -36,7 +41,7 @@ TEST(OneHotOpTest, DefaultAxis_float_float_float /*indices, output, depth*/) { 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.}); - test.Run(); + run_excluding_trt(test); } TEST(OneHotOpTest, DefaultAxis_int64_int32_float /*indices, output, depth*/) { @@ -51,7 +56,7 @@ TEST(OneHotOpTest, DefaultAxis_int64_int32_float /*indices, output, depth*/) { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}); - test.Run(); + run_excluding_trt(test); } TEST(OneHotOpTest, DefaultAxis_int64_float_int64 /*indices, output, depth*/) { @@ -81,7 +86,7 @@ TEST(OneHotOpTest, DefaultAxis_int32_float_float /*indices, output, depth*/) { 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f}); - test.Run(); + run_excluding_trt(test); } TEST(OneHotOpTest, DefaultAxis_int32_float_int32 /*indices, output, depth*/) { @@ -231,7 +236,7 @@ TEST(OneHotOpTest, DefaultAxis_float_float_float_NonZeroOffValue /*indices, outp 2., 2., 3., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 3., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 3., 2., 2., 2.}); - test.Run(); + run_excluding_trt(test); } TEST(OneHotOpTest, DefaultAxis_int64_int32_float_NonZeroOffValue /*indices, output, depth*/) { @@ -246,7 +251,7 @@ TEST(OneHotOpTest, DefaultAxis_int64_int32_float_NonZeroOffValue /*indices, outp 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2}); - test.Run(); + run_excluding_trt(test); } TEST(OneHotOpTest, DefaultAxis_int64_float_int64_NonZeroOffValue /*indices, output, depth*/) { @@ -276,7 +281,7 @@ TEST(OneHotOpTest, DefaultAxis_int32_float_float_NonZeroOffValue /*indices, outp 2.0f, 2.0f, 3.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 3.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 3.0f, 2.0f, 2.0f, 2.0f}); - test.Run(); + run_excluding_trt(test); } TEST(OneHotOpTest, DefaultAxis_int32_float_int32_NonZeroOffValue /*indices, output, depth*/) { diff --git a/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc b/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc index 062f25b989a70..f8467fb50ed2d 100644 --- a/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc @@ -8,6 +8,12 @@ namespace onnxruntime { namespace test { + +// Exclude TRT EP in tests due to Segmentation fault in A100 +static void run_excluding_trt(OpTester& test) { + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_tf_crop_and_resize) { // TODO: Unskip when fixed #41968513 if (DefaultDmlExecutionProvider().get() != nullptr) { @@ -243,7 +249,9 @@ TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_4DBilinear) { std::vector Y = {2.66666651f, 4.3333331f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider}); // QNN: result diff + // QNN: result diff + // TRT: Segmentation fault in A100 + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider, kTensorrtExecutionProvider}); } TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear) { @@ -267,8 +275,9 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear) { test.AddOutput("Y", {N, static_cast(H * scales[1]), static_cast(W * scales[2]), C}, Y); // CUDA: result mismatch due to not implementing NHWC support // ROCm: results mismatch + // TRT: Segmentation fault in A100 test.Run(OpTester::ExpectResult::kExpectSuccess, "", - {kCudaExecutionProvider, kCudaNHWCExecutionProvider, kRocmExecutionProvider}); + {kCudaExecutionProvider, kCudaNHWCExecutionProvider, kRocmExecutionProvider, kTensorrtExecutionProvider}); } TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear_uint8) { @@ -315,7 +324,7 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear_int8) { std::vector Y = {0, 0}; test.AddOutput("Y", {N, static_cast(H * scales[1]), static_cast(W * scales[2]), C}, Y); - test.Run(); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); } // Since NNAPI(TFLite) only using the scale calculate using the input/output size @@ -347,7 +356,7 @@ TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_4DBilinear1) { std::vector Y = {3.5f, 5.5f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); }; run_test(false); @@ -405,7 +414,7 @@ TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_4DBilinear_align_corners) { std::vector Y = {1.0f, 4.0f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); }; run_test(false); @@ -608,7 +617,7 @@ TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_4DBilinear_asymmetric_scales) { 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 11.0f, 11.0f, 11.0f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); }; run_test(false); @@ -725,7 +734,7 @@ TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_2DBilinear_align_corners) { 4.0f, 4.5714290f, 5.142857f, 5.714286f, 6.285714f, 6.8571430f, 7.428571f, 8.0f}; test.AddOutput("Y", {static_cast(H * scales[0]), static_cast(W * scales[1])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_3DTrilinear_pytorch_half_pixel) { @@ -819,7 +828,7 @@ TEST(ResizeOpTest, ResizeOpLinearScalesNoOpTest) { 7.0f, 11.0f}; test.AddOutput("Y", {N, C, H, W}, Y); - test.Run(); + run_excluding_trt(test); }; run_test(false); @@ -845,7 +854,7 @@ TEST(ResizeOpTest, ResizeOpNearestDownSampleTest) { std::vector Y = {1.0f, 3.0f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpNearestDownSampleTest_Opset12) { @@ -867,7 +876,7 @@ TEST(ResizeOpTest, ResizeOpNearestDownSampleTest_Opset12) { std::vector Y = {1.0f, 3.0f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpNearestDownSampleTest_WithSizes) { @@ -920,7 +929,7 @@ TEST(ResizeOpTest, ResizeOpNearestDownSampleTest_tf_half_pixel) { 14.0f, 16.0f}; test.AddOutput("Y", {N, C, sizes[2], sizes[3]}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpNearestDownSampleTest_tf_crop_and_resize_with_extrapolation) { @@ -1000,7 +1009,7 @@ TEST(ResizeOpTest, ResizeOpNearestUpSampleTest) { 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpNearestUpSampleTest_WithSizes_CeilMode) { @@ -1093,7 +1102,7 @@ TEST(ResizeOpTest, ResizeOpNearestUpSample_Floor_Align_Corners) { 13.0f, 13.0f, 13.0f, 14.0f, 14.0f, 15.0f, 15.0f, 16.0f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpNearest_OneToOneMappingBetweenInputAndOutputDataDims) { @@ -1197,7 +1206,7 @@ TEST(ResizeOpTest, ResizeOpNearestUpSample_Nearest2xOptimization_Scales) { 3.0f, 3.0f, 4.0f, 4.0f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); }; run_test(false); @@ -1262,7 +1271,7 @@ TEST(ResizeOpTest, ResizeOpCubicDownSampleTest) { 11.9165f, 13.2266f, 14.5278f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpCubicDownSampleTest_exclude_outside) { @@ -1292,7 +1301,7 @@ TEST(ResizeOpTest, ResizeOpCubicDownSampleTest_exclude_outside) { 11.949f, 13.2503f, 14.5942f}; test.AddOutput("Y", {static_cast(H * scales[0]), static_cast(W * scales[1])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpCubicDownSampleTest_coeff) { @@ -1319,7 +1328,7 @@ TEST(ResizeOpTest, ResizeOpCubicDownSampleTest_coeff) { 11.8701f, 13.168f, 14.4912f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpCubicDownSampleTest_with_roi) { @@ -1373,7 +1382,7 @@ TEST(ResizeOpTest, ResizeOpCubicDownSampleTest_asymmetric) { 11.375f, 12.6719f, 13.9688f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpCubicUpSampleTest) { @@ -1405,7 +1414,7 @@ TEST(ResizeOpTest, ResizeOpCubicUpSampleTest) { 13.375f, 13.7813f, 14.375f, 14.875f, 15.375f, 15.9688f, 16.375f, 16.4688f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpCubicUpSampleTest_MultiChannel) { @@ -1486,7 +1495,7 @@ TEST(ResizeOpTest, ResizeOpCubicUpSampleTest_tf_half_pixel_for_nn) { 13.332f, 13.8086f, 14.4375f, 14.8438f, 15.4727f, 15.9492f, 16.2461f, 16.1758f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_4DBilinear_Ver10) { @@ -1512,7 +1521,9 @@ TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_4DBilinear_Ver10) { std::vector Y = {1.0f, 2.66666651f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider}); // QNN: result diff + // QNN: result diff + // TRT: segmentation fault in A100 + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider, kTensorrtExecutionProvider}); } TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_2DBilinear_Ver10) { @@ -1538,7 +1549,7 @@ TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_2DBilinear_Ver10) { std::vector Y = {1.0f, 2.66666651f}; test.AddOutput("Y", {static_cast(H * scales[0]), static_cast(W * scales[1])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_4DBilinear_Ver10) { @@ -1574,7 +1585,9 @@ TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_4DBilinear_Ver10) { 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 11.0f, 11.0f, 11.0f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider}); // QNN: result diff + // QNN: result diff + // TRT: segmentation fault in A100 + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider, kTensorrtExecutionProvider}); } TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_2DBilinear_Ver10) { @@ -1602,7 +1615,7 @@ TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_2DBilinear_Ver10) { 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 8.0f, 8.0f, 8.0f}; test.AddOutput("Y", {static_cast(H * scales[0]), static_cast(W * scales[1])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpLinearScalesNoOpTest_Ver10) { @@ -1627,7 +1640,7 @@ TEST(ResizeOpTest, ResizeOpLinearScalesNoOpTest_Ver10) { 7.0f, 11.0f}; test.AddOutput("Y", {N, C, H, W}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpNearestDownSampleTest_Ver10) { @@ -1647,7 +1660,7 @@ TEST(ResizeOpTest, ResizeOpNearestDownSampleTest_Ver10) { std::vector Y = {1.0f, 3.0f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpNearestUpSampleTest_Ver10) { @@ -1668,10 +1681,10 @@ TEST(ResizeOpTest, ResizeOpNearestUpSampleTest_Ver10) { 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); } -TEST(UpsampleOpTest, ResizeOpNearestNoScaleTest_Ver10) { +TEST(ResizeOpTest, ResizeOpNearestNoScaleTest_Ver10) { OpTester test("Resize", 10); std::vector scales{1.0f, 1.0f, 1.0f, 1.0f}; @@ -1686,7 +1699,7 @@ TEST(UpsampleOpTest, ResizeOpNearestNoScaleTest_Ver10) { std::vector Y = {1.0f, 2.0f, 3.0f, 4.0f}; test.AddOutput("Y", {N, C, H, W}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOp_MissingRoiAndMissingScalesOptionalInputs) { @@ -1737,7 +1750,7 @@ void ResizeOpTypeCheck_Ver_10() { 3, 3, 3, 4, 4, 4}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpTypeCheck_Ver_10) { @@ -1768,7 +1781,7 @@ void ResizeOpTypeCheck_Ver_11_13_18(int opset_version) { 3, 3, 3, 4, 4, 4}; test.AddOutput("Y", {N, C, static_cast(H * scales[2]), static_cast(W * scales[3])}, Y); - test.Run(); + run_excluding_trt(test); } TEST(ResizeOpTest, ResizeOpTypeCheck_Ver11) { diff --git a/onnxruntime/test/providers/cpu/tensor/upsample_op_test.cc b/onnxruntime/test/providers/cpu/tensor/upsample_op_test.cc index 188532cfa350a..825bdc8c8103d 100644 --- a/onnxruntime/test/providers/cpu/tensor/upsample_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/upsample_op_test.cc @@ -939,7 +939,9 @@ TEST(UpsampleOpTest, UpsampleOpNearest2XTest_opset9) { 7, 7, 9, 9}; test.AddOutput("Y", {N, C, (int64_t)(H * scales[2]), (int64_t)(W * scales[3])}, Y); - test.Run(); + + // TRT: segmentation fault in A100 + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); } TEST(UpsampleOpTest, NhwcUpsampleOpNearest2XTest_opset9) { diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index c12a52c4356aa..6ad2d41edb562 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -8,7 +8,7 @@ #ifdef USE_COREML #include "core/providers/coreml/coreml_provider_factory.h" #endif -#if defined(ENABLE_CUDA_NHWC_OPS) +#ifdef USE_CUDA #include #endif #include "core/session/onnxruntime_cxx_api.h" @@ -113,8 +113,9 @@ std::unique_ptr DefaultOpenVINOExecutionProvider() { std::unique_ptr DefaultCudaExecutionProvider() { #ifdef USE_CUDA - OrtCUDAProviderOptions provider_options{}; + OrtCUDAProviderOptionsV2 provider_options{}; provider_options.do_copy_in_default_stream = true; + provider_options.use_tf32 = false; if (auto factory = CudaProviderFactoryCreator::Create(&provider_options)) return factory->CreateProvider(); #endif @@ -126,6 +127,7 @@ std::unique_ptr DefaultCudaNHWCExecutionProvider() { #if defined(USE_CUDA) OrtCUDAProviderOptionsV2 provider_options{}; provider_options.do_copy_in_default_stream = true; + provider_options.use_tf32 = false; provider_options.prefer_nhwc = true; if (auto factory = CudaProviderFactoryCreator::Create(&provider_options)) return factory->CreateProvider(); diff --git a/orttraining/orttraining/test/gradient/optimizer_ops_test.cc b/orttraining/orttraining/test/gradient/optimizer_ops_test.cc index bfb59f1525e47..84de4b5ef7ff0 100644 --- a/orttraining/orttraining/test/gradient/optimizer_ops_test.cc +++ b/orttraining/orttraining/test/gradient/optimizer_ops_test.cc @@ -144,6 +144,8 @@ TEST(OptimizerTest, AdamBiasCorrection) { test.AddOutput("Moment_2_Out", {3}, {1.7400e-04f, 8.9966e-04f, 1.5102e-03f}); test.AddOutput("W_Out", {3}, {-1.4634f, -0.6416f, -1.2121f}); + test.SetOutputTolerance(0.0001f, 0.0001f); + test.AddAttribute("do_bias_correction", static_cast(1)); test.AddAttribute("weight_decay_mode", static_cast(0)); @@ -167,6 +169,8 @@ TEST(OptimizerTest, AdamWeightDecayMode0NoBiasCorrection) { test.AddOutput("W_Out", {3}, {-3.6210f, -2.8075f, -3.3723f}); test.AddOutput("G_Out", {3}, {-3.1576f, -3.1658f, -3.1601f}); + test.SetOutputTolerance(0.0001f, 0.0001f); + test.AddAttribute("do_bias_correction", static_cast(0)); test.AddAttribute("lambda", 0.01f); test.AddAttribute("weight_decay_mode", static_cast(0)); @@ -191,6 +195,8 @@ TEST(OptimizerTest, AdamWeightDecayMode0WithBiasCorrection) { test.AddOutput("W_Out", {3}, {-1.4587f, -0.6452f, -1.2099f}); test.AddOutput("G_Out", {3}, {-0.9954f, -1.0036f, -0.9979f}); + test.SetOutputTolerance(0.0001f, 0.0001f); + test.AddAttribute("do_bias_correction", static_cast(1)); test.AddAttribute("lambda", 0.01f); test.AddAttribute("weight_decay_mode", static_cast(0)); @@ -214,6 +220,8 @@ TEST(OptimizerTest, AdamWeightDecayMode1NoBiasCorrection) { test.AddOutput("Moment_2_Out", {3}, {1.7400e-04f, 8.9966e-04f, 1.5102e-03f}); test.AddOutput("W_Out", {3}, {-3.5894f, -2.7758f, -3.3406f}); + test.SetOutputTolerance(0.0001f, 0.0001f); + test.AddAttribute("do_bias_correction", static_cast(0)); test.AddAttribute("lambda", 0.01f); test.AddAttribute("weight_decay_mode", static_cast(1)); @@ -237,6 +245,8 @@ TEST(OptimizerTest, AdamWeightDecayMode1WithBiasCorrection) { test.AddOutput("Moment_2_Out", {3}, {1.7400e-04f, 8.9966e-04f, 1.5102e-03f}); test.AddOutput("W_Out", {3}, {-1.4488f, -0.6352f, -1.1999f}); + test.SetOutputTolerance(0.0001f, 0.0001f); + test.AddAttribute("do_bias_correction", static_cast(1)); test.AddAttribute("lambda", 0.01f); test.AddAttribute("weight_decay_mode", static_cast(1)); @@ -368,6 +378,11 @@ TEST(OptimizerTest, AdamOptimizerMixPrecision_FP16Weight_ClipNorm_Test) { test.AddOptionalOutputEdge(); test.AddOutput("FP16_W_Out", {3}, w_new_half); + test.SetOutputAbsErr("Moment_1_Out", 0.005f); + test.SetOutputAbsErr("Moment_2_Out", 0.005f); + test.SetOutputAbsErr("W_Out", 0.001f); + test.SetOutputAbsErr("FP16_W_Out", 0.005f); + test.AddAttribute("do_bias_correction", static_cast(0)); test.AddAttribute("weight_decay_mode", static_cast(0)); test.AddAttribute("max_norm_clip", 0.001f); diff --git a/orttraining/orttraining/test/training_ops/cpu/nn/batchnorm_internal_test.cc b/orttraining/orttraining/test/training_ops/cpu/nn/batchnorm_internal_test.cc index e9795a24681cb..3e0d35ab17c9b 100644 --- a/orttraining/orttraining/test/training_ops/cpu/nn/batchnorm_internal_test.cc +++ b/orttraining/orttraining/test/training_ops/cpu/nn/batchnorm_internal_test.cc @@ -37,6 +37,8 @@ TEST(BatchNormInternalTest, ForwardTrainingTest) { test.AddOutput("saved_mean", channel_dims, {-0.306f, 0.114562f}); test.AddOutput("saved_inv_std", channel_dims, {1.2288f, 0.861317f}); + test.SetOutputTolerance(0.0001f, 0.0001f); + std::vector> execution_providers; execution_providers.emplace_back(DefaultCpuExecutionProvider());