From 4d9daa884d38efd053de86879a56229d4eb143a1 Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Wed, 6 Mar 2024 17:47:17 -0800 Subject: [PATCH] Update tolerance of provider tests to fix flaky tests (#19792) ### Description Check float/double/float16/bfloat16 tensors are close like [numpy.isclose](https://numpy.org/doc/stable/reference/generated/numpy.isclose.html). ``` absolute(a - b) <= (atol + rtol * absolute(b)) ``` The default tolerance thresholds: - float: atol=1e-5 and rtol=1e-4 - float16: atol=0.0025 and rtol=0.001 - bfloat16: atol=0.02 and rtol=0.01 ### Motivation and Context Current pipeline has frequent failure due to using only relative tolerance in https://github.com/microsoft/onnxruntime/pull/19608: [ RUN ] MatMulIntegerToFloat.NoZeroPoint_NoBias_test_U8S8 1: C:\a\_work\1\s\onnxruntime\test\providers\checkers.cc(272): error: The difference between cur_expected[i] and cur_actual[i] is 1.3113021850585938e-06, which exceeds *(params.relative_error) * std::abs(cur_expected[i]), where 1: cur_expected[i] evaluates to -1.3113021850585938e-06, 1: cur_actual[i] evaluates to 0, and 1: *(params.relative_error) * std::abs(cur_expected[i]) evaluates to 2.6226043559063328e-08. It is not reasonable to use relative tolerance for a small value very close to 0. Combining relative tolerance with a positive absolute tolerance could avoid such issue. --- .../matmul_integer_to_float_test.cc | 1 + onnxruntime/test/providers/checkers.cc | 159 +++++++++--------- 2 files changed, 83 insertions(+), 77 deletions(-) diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc index 6f3ca7e239671..72a5ba4dcefbf 100644 --- a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc @@ -127,6 +127,7 @@ void TestMatMulIntegerToFloat(bool is_matrix_b_constant, if (std::is_same_v) { test.AddOutput("Y", {M, N}, Y_data); + test.SetOutputAbsErr("Y", 0.0001f); test.SetOutputRelErr("Y", 0.02f); } else { test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); diff --git a/onnxruntime/test/providers/checkers.cc b/onnxruntime/test/providers/checkers.cc index 85ccb8f175f62..c97e6d9de4911 100644 --- a/onnxruntime/test/providers/checkers.cc +++ b/onnxruntime/test/providers/checkers.cc @@ -14,6 +14,54 @@ namespace onnxruntime { namespace test { namespace { + +template +struct DefaultTolerance; + +template <> +struct DefaultTolerance { + static constexpr float absolute = 1e-6f; + static constexpr float relative = 1e-5f; +}; + +template <> +struct DefaultTolerance { + static constexpr float absolute = 1e-5f; + static constexpr float relative = 1e-4f; +}; + +template <> +struct DefaultTolerance { + // The thresholds are estimated with PyTorch script like the following: + // x = torch.rand(1000, 1000) + // absolute = ((x + 1e-6).to(torch.float16) - x).abs().max() * 10 + // x[abs(x) < absolute] = absolute + // relative = ((x - x.to(torch.float16)) / x).abs().max() * 2 + static constexpr float absolute = 0.0025f; + static constexpr float relative = 0.001f; +}; + +template <> +struct DefaultTolerance { + static constexpr float absolute = 0.02f; + static constexpr float relative = 0.01f; +}; + +template +T get_tolerance(float absolute, float relative, T expected_value) { + static_assert(std::is_floating_point::value, "T must be a floating point type"); + + // The formula is similar to numpy.isclose: https://numpy.org/doc/stable/reference/generated/numpy.isclose.html + return static_cast(absolute) + static_cast(relative) * std::abs(expected_value); +} + +template // D is the original data type +T get_tolerance(const ValidateOutputParams& params, T expected_value) { + float absolute = (params.absolute_error.has_value() ? *(params.absolute_error) : DefaultTolerance::absolute); + float relative = (params.relative_error.has_value() ? *(params.relative_error) : DefaultTolerance::relative); + return get_tolerance(absolute, relative, expected_value); +} + template Tensor copy_sort(const Tensor& src, const AllocatorPtr& allocator) { Tensor result(src.DataType(), src.Shape(), allocator); @@ -67,7 +115,7 @@ struct TensorCheck { cur_actual = actual.Data(); } - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i; } } @@ -111,7 +159,7 @@ struct TensorCheck { double threshold = has_abs_err ? *(params.absolute_error) : 0.0; - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { if (has_rel_err) { EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.relative_error) * cur_expected[i]) // expected[i] is unsigned, can't be negative @@ -121,7 +169,7 @@ struct TensorCheck { } } } else { - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i; } } @@ -157,11 +205,11 @@ struct TensorCheck { if (has_abs_err) { double threshold = *(params.absolute_error); - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { EXPECT_NEAR(cur_expected[i], cur_actual[i], threshold) << "i:" << i; } } else { - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i; } } @@ -176,8 +224,7 @@ struct TensorCheck { const std::string& /*provider_type*/) const { auto size = actual.Shape().Size(); - bool has_abs_err = params.absolute_error.has_value(); - bool has_rel_err = params.relative_error.has_value(); + const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value(); // deal with rare cases in which order of output data from a kernel MAY be // undefined @@ -198,7 +245,7 @@ struct TensorCheck { threshold = 0.005; #endif - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { // NOTE: Check isnan first to work around MSVC linker bug when /LTCG:incremental is specified. // If the isinf check is first the isnan check and branch gets omitted if (std::isnan(cur_expected[i])) { @@ -206,44 +253,33 @@ struct TensorCheck { } else if (std::isinf(cur_expected[i])) { // Test infinity for equality EXPECT_EQ(cur_expected[i], cur_actual[i]) << "Expected infinity. i:" << i; } else { - if (!has_abs_err && !has_rel_err) { - // the default for existing tests - EXPECT_NEAR(cur_expected[i], cur_actual[i], threshold) << "i:" << i; - } else { - if (has_abs_err) { - EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.absolute_error)) << "i:" << i; - } - if (has_rel_err) { - EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.relative_error) * std::abs(cur_expected[i])) - << "i:" << i; - } - } + double tolerance = has_tolerance ? get_tolerance(params, cur_expected[i]) : threshold; + EXPECT_NEAR(cur_expected[i], cur_actual[i], tolerance) << "i:" << i; } } } }; -template +template void InternalNumericalCheck(const Tensor& expected, const Tensor& actual, const ValidateOutputParams& params, const std::string& /*provider_type*/) { - const bool has_abs_err = params.absolute_error.has_value(); - const bool has_rel_err = params.relative_error.has_value(); + const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value(); // deal with rare cases in which order of output data from a kernel MAY be // undefined Tensor expected_sorted, actual_sorted; - const TypeToCheck* cur_expected; - const TypeToCheck* cur_actual; + const T* cur_expected; + const T* cur_actual; auto size = actual.Shape().Size(); if (params.sort_output) { - sort_expected_and_actual_buffers(expected, expected_sorted, actual, actual_sorted); - cur_expected = expected_sorted.Data(); - cur_actual = actual_sorted.Data(); + sort_expected_and_actual_buffers(expected, expected_sorted, actual, actual_sorted); + cur_expected = expected_sorted.Data(); + cur_actual = actual_sorted.Data(); } else { - cur_expected = expected.Data(); - cur_actual = actual.Data(); + cur_expected = expected.Data(); + cur_actual = actual.Data(); } #if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML) @@ -252,7 +288,7 @@ void InternalNumericalCheck(const Tensor& expected, constexpr float threshold = 0.0001f; #endif - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { // NOTE: Check isnan first to work around MSVC linker bug when /LTCG:incremental is specified. // If the isinf check is first the isnan check and branch gets omitted if (std::isnan(cur_expected[i])) { @@ -260,19 +296,8 @@ void InternalNumericalCheck(const Tensor& expected, } else if (std::isinf(cur_expected[i])) { // Test infinity for equality EXPECT_EQ(cur_expected[i], cur_actual[i]) << "Expected infinity. i:" << i; } else { - if (!has_abs_err && !has_rel_err) { - // the default for existing tests - EXPECT_NEAR(cur_expected[i], cur_actual[i], threshold) << "i:" << i; - } else { - if (has_abs_err) { - EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.absolute_error)) - << "i:" << i; - } - if (has_rel_err) { - EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.relative_error) * std::abs(cur_expected[i])) - << "i:" << i; - } - } + T tolerance = has_tolerance ? get_tolerance(params, cur_expected[i]) : threshold; + EXPECT_NEAR(cur_expected[i], cur_actual[i], tolerance) << "i:" << i; } } } @@ -308,8 +333,7 @@ struct TensorCheck { sort_expected_and_actual_buffers(f_expected, f_actual); } - const bool has_abs_err = params.absolute_error.has_value(); - const bool has_rel_err = params.relative_error.has_value(); + const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value(); float threshold = 0.001f; #if defined(USE_TENSORRT) || defined(ENABLE_TRAINING_CORE) || defined(USE_CUDA) || defined(USE_ROCM) @@ -317,25 +341,14 @@ struct TensorCheck { #elif defined(USE_DML) threshold = 0.02f; #endif - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { if (std::isnan(f_expected[i])) { EXPECT_TRUE(std::isnan(f_expected[i])) << "Expected NaN. i:" << i; } else if (std::isinf(f_expected[i])) { // Test infinity for equality EXPECT_EQ(f_expected[i], f_actual[i]) << "Expected infinity. i:" << i; } else { - if (!has_abs_err && !has_rel_err) { - // the default for existing tests - EXPECT_NEAR(f_expected[i], f_actual[i], threshold) << "i:" << i; - } else { - if (has_abs_err) { - EXPECT_NEAR(f_expected[i], f_actual[i], *(params.absolute_error)) - << "i:" << i; - } - if (has_rel_err) { - EXPECT_NEAR(f_expected[i], f_actual[i], *(params.relative_error) * std::abs(static_cast(cur_expected[i]))) - << "i:" << i; - } - } + float tolerance = has_tolerance ? get_tolerance(params, f_expected[i]) : threshold; + EXPECT_NEAR(f_expected[i], f_actual[i], tolerance) << "i:" << i; } } } @@ -362,32 +375,24 @@ struct TensorCheck { sort_expected_and_actual_buffers(f_expected, f_actual); } - /// XXX: May need to adjust threshold as BFloat is coarse + const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value(); + float abs_threshold = 0.0001f; - float threshold = 0.001f; + float rel_threshold = 0.001f; #if defined(USE_TENSORRT) || defined(ENABLE_TRAINING_CORE) || defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML) || defined(USE_DNNL) - threshold = 0.05f; // expect at least 95% close + rel_threshold = 0.05f; // expect at least 95% close #endif - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { if (std::isnan(f_expected[i])) { EXPECT_TRUE(std::isnan(f_expected[i])) << "Expected NaN. i:" << i; } else if (std::isinf(f_expected[i])) { // Test infinity for equality EXPECT_EQ(f_expected[i], f_actual[i]) << "Expected infinity. i:" << i; } else { - // the default for existing tests - const float max_value = fmax(fabs(f_expected[i]), fabs(f_actual[i])); - if (max_value != 0) { // max_value = 0 means output and expected are 0s. - const float abs_error = fabs(f_expected[i] - f_actual[i]); - if (abs_error <= abs_threshold) { - // if the absolute error is small enough, then no need to calculate realative error - EXPECT_NEAR(0, abs_error, abs_threshold); - } else { - // default for existing tests. - const float rel_error = abs_error / max_value; - EXPECT_NEAR(0, rel_error, threshold); - } - } + float tolerance = has_tolerance + ? get_tolerance(params, f_expected[i]) + : get_tolerance(abs_threshold, rel_threshold, f_expected[i]); + EXPECT_NEAR(f_expected[i], f_actual[i], tolerance) << "i:" << i; } } }