Skip to content

Commit

Permalink
Update tolerance of provider tests to fix flaky tests (#19792)
Browse files Browse the repository at this point in the history
### Description

Check float/double/float16/bfloat16 tensors are close like
[numpy.isclose](https://numpy.org/doc/stable/reference/generated/numpy.isclose.html).
```
absolute(a - b) <= (atol + rtol * absolute(b))
```

The default tolerance thresholds:
- float: atol=1e-5 and rtol=1e-4
- float16: atol=0.0025 and rtol=0.001
- bfloat16: atol=0.02 and rtol=0.01

### Motivation and Context

Current pipeline has frequent failure due to using only relative
tolerance in #19608:

[ RUN      ] MatMulIntegerToFloat.NoZeroPoint_NoBias_test_U8S8
1: C:\a\_work\1\s\onnxruntime\test\providers\checkers.cc(272): error:
The difference between cur_expected[i] and cur_actual[i] is
1.3113021850585938e-06, which exceeds *(params.relative_error) *
std::abs(cur_expected[i]), where
1: cur_expected[i] evaluates to -1.3113021850585938e-06,
1: cur_actual[i] evaluates to 0, and
1: *(params.relative_error) * std::abs(cur_expected[i]) evaluates to
2.6226043559063328e-08.

It is not reasonable to use relative tolerance for a small value very
close to 0. Combining relative tolerance with a positive absolute
tolerance could avoid such issue.
  • Loading branch information
tianleiwu authored Mar 7, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 5c5d6e9 commit bff4f8b
Showing 2 changed files with 83 additions and 77 deletions.
Original file line number Diff line number Diff line change
@@ -127,6 +127,7 @@ void TestMatMulIntegerToFloat(bool is_matrix_b_constant,

if (std::is_same_v<OType, float>) {
test.AddOutput<float>("Y", {M, N}, Y_data);
test.SetOutputAbsErr("Y", 0.0001f);
test.SetOutputRelErr("Y", 0.02f);
} else {
test.AddOutput<MLFloat16>("Y", {M, N}, ToFloat16(Y_data));
159 changes: 82 additions & 77 deletions onnxruntime/test/providers/checkers.cc
Original file line number Diff line number Diff line change
@@ -14,6 +14,54 @@
namespace onnxruntime {
namespace test {
namespace {

template <typename T>
struct DefaultTolerance;

template <>
struct DefaultTolerance<double> {
static constexpr float absolute = 1e-6f;
static constexpr float relative = 1e-5f;
};

template <>
struct DefaultTolerance<float> {
static constexpr float absolute = 1e-5f;
static constexpr float relative = 1e-4f;
};

template <>
struct DefaultTolerance<MLFloat16> {
// The thresholds are estimated with PyTorch script like the following:
// x = torch.rand(1000, 1000)
// absolute = ((x + 1e-6).to(torch.float16) - x).abs().max() * 10
// x[abs(x) < absolute] = absolute
// relative = ((x - x.to(torch.float16)) / x).abs().max() * 2
static constexpr float absolute = 0.0025f;
static constexpr float relative = 0.001f;
};

template <>
struct DefaultTolerance<BFloat16> {
static constexpr float absolute = 0.02f;
static constexpr float relative = 0.01f;
};

template <typename T>
T get_tolerance(float absolute, float relative, T expected_value) {
static_assert(std::is_floating_point<T>::value, "T must be a floating point type");

// The formula is similar to numpy.isclose: https://numpy.org/doc/stable/reference/generated/numpy.isclose.html
return static_cast<T>(absolute) + static_cast<T>(relative) * std::abs(expected_value);
}

template <typename T, typename D> // D is the original data type
T get_tolerance(const ValidateOutputParams& params, T expected_value) {
float absolute = (params.absolute_error.has_value() ? *(params.absolute_error) : DefaultTolerance<D>::absolute);
float relative = (params.relative_error.has_value() ? *(params.relative_error) : DefaultTolerance<D>::relative);
return get_tolerance<T>(absolute, relative, expected_value);
}

template <typename T>
Tensor copy_sort(const Tensor& src, const AllocatorPtr& allocator) {
Tensor result(src.DataType(), src.Shape(), allocator);
@@ -67,7 +115,7 @@ struct TensorCheck {
cur_actual = actual.Data<T>();
}

for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i;
}
}
@@ -111,7 +159,7 @@ struct TensorCheck<uint8_t> {
double threshold = has_abs_err ? *(params.absolute_error)
: 0.0;

for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
if (has_rel_err) {
EXPECT_NEAR(cur_expected[i], cur_actual[i],
*(params.relative_error) * cur_expected[i]) // expected[i] is unsigned, can't be negative
@@ -121,7 +169,7 @@ struct TensorCheck<uint8_t> {
}
}
} else {
for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i;
}
}
@@ -157,11 +205,11 @@ struct TensorCheck<int8_t> {
if (has_abs_err) {
double threshold = *(params.absolute_error);

for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
EXPECT_NEAR(cur_expected[i], cur_actual[i], threshold) << "i:" << i;
}
} else {
for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i;
}
}
@@ -176,8 +224,7 @@ struct TensorCheck<double> {
const std::string& /*provider_type*/) const {
auto size = actual.Shape().Size();

bool has_abs_err = params.absolute_error.has_value();
bool has_rel_err = params.relative_error.has_value();
const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value();

// deal with rare cases in which order of output data from a kernel MAY be
// undefined
@@ -198,52 +245,41 @@ struct TensorCheck<double> {
threshold = 0.005;
#endif

for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
// NOTE: Check isnan first to work around MSVC linker bug when /LTCG:incremental is specified.
// If the isinf check is first the isnan check and branch gets omitted
if (std::isnan(cur_expected[i])) {
EXPECT_TRUE(std::isnan(cur_actual[i])) << "Expected NaN. i:" << i;
} else if (std::isinf(cur_expected[i])) { // Test infinity for equality
EXPECT_EQ(cur_expected[i], cur_actual[i]) << "Expected infinity. i:" << i;
} else {
if (!has_abs_err && !has_rel_err) {
// the default for existing tests
EXPECT_NEAR(cur_expected[i], cur_actual[i], threshold) << "i:" << i;
} else {
if (has_abs_err) {
EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.absolute_error)) << "i:" << i;
}
if (has_rel_err) {
EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.relative_error) * std::abs(cur_expected[i]))
<< "i:" << i;
}
}
double tolerance = has_tolerance ? get_tolerance<double, double>(params, cur_expected[i]) : threshold;
EXPECT_NEAR(cur_expected[i], cur_actual[i], tolerance) << "i:" << i;
}
}
}
};

template <typename TypeToCheck>
template <typename T>
void InternalNumericalCheck(const Tensor& expected,
const Tensor& actual,
const ValidateOutputParams& params,
const std::string& /*provider_type*/) {
const bool has_abs_err = params.absolute_error.has_value();
const bool has_rel_err = params.relative_error.has_value();
const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value();

// deal with rare cases in which order of output data from a kernel MAY be
// undefined
Tensor expected_sorted, actual_sorted;
const TypeToCheck* cur_expected;
const TypeToCheck* cur_actual;
const T* cur_expected;
const T* cur_actual;
auto size = actual.Shape().Size();
if (params.sort_output) {
sort_expected_and_actual_buffers<TypeToCheck>(expected, expected_sorted, actual, actual_sorted);
cur_expected = expected_sorted.Data<TypeToCheck>();
cur_actual = actual_sorted.Data<TypeToCheck>();
sort_expected_and_actual_buffers<T>(expected, expected_sorted, actual, actual_sorted);
cur_expected = expected_sorted.Data<T>();
cur_actual = actual_sorted.Data<T>();
} else {
cur_expected = expected.Data<TypeToCheck>();
cur_actual = actual.Data<TypeToCheck>();
cur_expected = expected.Data<T>();
cur_actual = actual.Data<T>();
}

#if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML)
@@ -252,27 +288,16 @@ void InternalNumericalCheck(const Tensor& expected,
constexpr float threshold = 0.0001f;
#endif

for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
// NOTE: Check isnan first to work around MSVC linker bug when /LTCG:incremental is specified.
// If the isinf check is first the isnan check and branch gets omitted
if (std::isnan(cur_expected[i])) {
EXPECT_TRUE(std::isnan(cur_actual[i])) << "Expected NaN. i:" << i;
} else if (std::isinf(cur_expected[i])) { // Test infinity for equality
EXPECT_EQ(cur_expected[i], cur_actual[i]) << "Expected infinity. i:" << i;
} else {
if (!has_abs_err && !has_rel_err) {
// the default for existing tests
EXPECT_NEAR(cur_expected[i], cur_actual[i], threshold) << "i:" << i;
} else {
if (has_abs_err) {
EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.absolute_error))
<< "i:" << i;
}
if (has_rel_err) {
EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.relative_error) * std::abs(cur_expected[i]))
<< "i:" << i;
}
}
T tolerance = has_tolerance ? get_tolerance<T, T>(params, cur_expected[i]) : threshold;
EXPECT_NEAR(cur_expected[i], cur_actual[i], tolerance) << "i:" << i;
}
}
}
@@ -308,34 +333,22 @@ struct TensorCheck<MLFloat16> {
sort_expected_and_actual_buffers<float>(f_expected, f_actual);
}

const bool has_abs_err = params.absolute_error.has_value();
const bool has_rel_err = params.relative_error.has_value();
const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value();

float threshold = 0.001f;
#if defined(USE_TENSORRT) || defined(ENABLE_TRAINING_CORE) || defined(USE_CUDA) || defined(USE_ROCM)
threshold = 0.005f;
#elif defined(USE_DML)
threshold = 0.02f;
#endif
for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
if (std::isnan(f_expected[i])) {
EXPECT_TRUE(std::isnan(f_expected[i])) << "Expected NaN. i:" << i;
} else if (std::isinf(f_expected[i])) { // Test infinity for equality
EXPECT_EQ(f_expected[i], f_actual[i]) << "Expected infinity. i:" << i;
} else {
if (!has_abs_err && !has_rel_err) {
// the default for existing tests
EXPECT_NEAR(f_expected[i], f_actual[i], threshold) << "i:" << i;
} else {
if (has_abs_err) {
EXPECT_NEAR(f_expected[i], f_actual[i], *(params.absolute_error))
<< "i:" << i;
}
if (has_rel_err) {
EXPECT_NEAR(f_expected[i], f_actual[i], *(params.relative_error) * std::abs(static_cast<float>(cur_expected[i])))
<< "i:" << i;
}
}
float tolerance = has_tolerance ? get_tolerance<float, MLFloat16>(params, f_expected[i]) : threshold;
EXPECT_NEAR(f_expected[i], f_actual[i], tolerance) << "i:" << i;
}
}
}
@@ -362,32 +375,24 @@ struct TensorCheck<BFloat16> {
sort_expected_and_actual_buffers<float>(f_expected, f_actual);
}

/// XXX: May need to adjust threshold as BFloat is coarse
const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value();

float abs_threshold = 0.0001f;
float threshold = 0.001f;
float rel_threshold = 0.001f;
#if defined(USE_TENSORRT) || defined(ENABLE_TRAINING_CORE) || defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML) || defined(USE_DNNL)
threshold = 0.05f; // expect at least 95% close
rel_threshold = 0.05f; // expect at least 95% close
#endif

for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
if (std::isnan(f_expected[i])) {
EXPECT_TRUE(std::isnan(f_expected[i])) << "Expected NaN. i:" << i;
} else if (std::isinf(f_expected[i])) { // Test infinity for equality
EXPECT_EQ(f_expected[i], f_actual[i]) << "Expected infinity. i:" << i;
} else {
// the default for existing tests
const float max_value = fmax(fabs(f_expected[i]), fabs(f_actual[i]));
if (max_value != 0) { // max_value = 0 means output and expected are 0s.
const float abs_error = fabs(f_expected[i] - f_actual[i]);
if (abs_error <= abs_threshold) {
// if the absolute error is small enough, then no need to calculate realative error
EXPECT_NEAR(0, abs_error, abs_threshold);
} else {
// default for existing tests.
const float rel_error = abs_error / max_value;
EXPECT_NEAR(0, rel_error, threshold);
}
}
float tolerance = has_tolerance
? get_tolerance<float, BFloat16>(params, f_expected[i])
: get_tolerance<float>(abs_threshold, rel_threshold, f_expected[i]);
EXPECT_NEAR(f_expected[i], f_actual[i], tolerance) << "i:" << i;
}
}
}

0 comments on commit bff4f8b

Please sign in to comment.