Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update tolerance of provider tests to fix flaky tests #19792

Merged
merged 6 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ void TestMatMulIntegerToFloat(bool is_matrix_b_constant,

if (std::is_same_v<OType, float>) {
test.AddOutput<float>("Y", {M, N}, Y_data);
test.SetOutputAbsErr("Y", 0.0001f);
test.SetOutputRelErr("Y", 0.02f);
} else {
test.AddOutput<MLFloat16>("Y", {M, N}, ToFloat16(Y_data));
Expand Down
159 changes: 82 additions & 77 deletions onnxruntime/test/providers/checkers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,54 @@
namespace onnxruntime {
namespace test {
namespace {

template <typename T>
struct DefaultTolerance;

template <>
struct DefaultTolerance<double> {
static constexpr float absolute = 1e-6f;
static constexpr float relative = 1e-5f;
};

template <>
struct DefaultTolerance<float> {
static constexpr float absolute = 1e-5f;
static constexpr float relative = 1e-4f;
};

template <>
struct DefaultTolerance<MLFloat16> {
// The thresholds are estimated with PyTorch script like the following:
// x = torch.rand(1000, 1000)
// absolute = ((x + 1e-6).to(torch.float16) - x).abs().max() * 10
// x[abs(x) < absolute] = absolute
// relative = ((x - x.to(torch.float16)) / x).abs().max() * 2
static constexpr float absolute = 0.0025f;
static constexpr float relative = 0.001f;
};

template <>
struct DefaultTolerance<BFloat16> {
static constexpr float absolute = 0.02f;
static constexpr float relative = 0.01f;
};

template <typename T>
T get_tolerance(float absolute, float relative, T expected_value) {
static_assert(std::is_floating_point<T>::value, "T must be a floating point type");

// The formula is similar to numpy.isclose: https://numpy.org/doc/stable/reference/generated/numpy.isclose.html
return static_cast<T>(absolute) + static_cast<T>(relative) * std::abs(expected_value);
}

template <typename T, typename D> // D is the original data type
T get_tolerance(const ValidateOutputParams& params, T expected_value) {
float absolute = (params.absolute_error.has_value() ? *(params.absolute_error) : DefaultTolerance<D>::absolute);
float relative = (params.relative_error.has_value() ? *(params.relative_error) : DefaultTolerance<D>::relative);
return get_tolerance<T>(absolute, relative, expected_value);
}

template <typename T>
Tensor copy_sort(const Tensor& src, const AllocatorPtr& allocator) {
Tensor result(src.DataType(), src.Shape(), allocator);
Expand Down Expand Up @@ -67,7 +115,7 @@ struct TensorCheck {
cur_actual = actual.Data<T>();
}

for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i;
}
}
Expand Down Expand Up @@ -111,7 +159,7 @@ struct TensorCheck<uint8_t> {
double threshold = has_abs_err ? *(params.absolute_error)
: 0.0;

for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
if (has_rel_err) {
EXPECT_NEAR(cur_expected[i], cur_actual[i],
*(params.relative_error) * cur_expected[i]) // expected[i] is unsigned, can't be negative
Expand All @@ -121,7 +169,7 @@ struct TensorCheck<uint8_t> {
}
}
} else {
for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i;
}
}
Expand Down Expand Up @@ -157,11 +205,11 @@ struct TensorCheck<int8_t> {
if (has_abs_err) {
double threshold = *(params.absolute_error);

for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
EXPECT_NEAR(cur_expected[i], cur_actual[i], threshold) << "i:" << i;
}
} else {
for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i;
}
}
Expand All @@ -176,8 +224,7 @@ struct TensorCheck<double> {
const std::string& /*provider_type*/) const {
auto size = actual.Shape().Size();

bool has_abs_err = params.absolute_error.has_value();
bool has_rel_err = params.relative_error.has_value();
const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value();

// deal with rare cases in which order of output data from a kernel MAY be
// undefined
Expand All @@ -198,52 +245,41 @@ struct TensorCheck<double> {
threshold = 0.005;
#endif

for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
// NOTE: Check isnan first to work around MSVC linker bug when /LTCG:incremental is specified.
// If the isinf check is first the isnan check and branch gets omitted
if (std::isnan(cur_expected[i])) {
EXPECT_TRUE(std::isnan(cur_actual[i])) << "Expected NaN. i:" << i;
} else if (std::isinf(cur_expected[i])) { // Test infinity for equality
EXPECT_EQ(cur_expected[i], cur_actual[i]) << "Expected infinity. i:" << i;
} else {
if (!has_abs_err && !has_rel_err) {
// the default for existing tests
EXPECT_NEAR(cur_expected[i], cur_actual[i], threshold) << "i:" << i;
} else {
if (has_abs_err) {
EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.absolute_error)) << "i:" << i;
}
if (has_rel_err) {
EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.relative_error) * std::abs(cur_expected[i]))
<< "i:" << i;
}
}
double tolerance = has_tolerance ? get_tolerance<double, double>(params, cur_expected[i]) : threshold;
EXPECT_NEAR(cur_expected[i], cur_actual[i], tolerance) << "i:" << i;
}
}
}
};

template <typename TypeToCheck>
template <typename T>
void InternalNumericalCheck(const Tensor& expected,
const Tensor& actual,
const ValidateOutputParams& params,
const std::string& /*provider_type*/) {
const bool has_abs_err = params.absolute_error.has_value();
const bool has_rel_err = params.relative_error.has_value();
const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value();

// deal with rare cases in which order of output data from a kernel MAY be
// undefined
Tensor expected_sorted, actual_sorted;
const TypeToCheck* cur_expected;
const TypeToCheck* cur_actual;
const T* cur_expected;
const T* cur_actual;
auto size = actual.Shape().Size();
if (params.sort_output) {
sort_expected_and_actual_buffers<TypeToCheck>(expected, expected_sorted, actual, actual_sorted);
cur_expected = expected_sorted.Data<TypeToCheck>();
cur_actual = actual_sorted.Data<TypeToCheck>();
sort_expected_and_actual_buffers<T>(expected, expected_sorted, actual, actual_sorted);
cur_expected = expected_sorted.Data<T>();
cur_actual = actual_sorted.Data<T>();
} else {
cur_expected = expected.Data<TypeToCheck>();
cur_actual = actual.Data<TypeToCheck>();
cur_expected = expected.Data<T>();
cur_actual = actual.Data<T>();
}

#if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML)
Expand All @@ -252,27 +288,16 @@ void InternalNumericalCheck(const Tensor& expected,
constexpr float threshold = 0.0001f;
#endif

for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
// NOTE: Check isnan first to work around MSVC linker bug when /LTCG:incremental is specified.
// If the isinf check is first the isnan check and branch gets omitted
if (std::isnan(cur_expected[i])) {
EXPECT_TRUE(std::isnan(cur_actual[i])) << "Expected NaN. i:" << i;
} else if (std::isinf(cur_expected[i])) { // Test infinity for equality
EXPECT_EQ(cur_expected[i], cur_actual[i]) << "Expected infinity. i:" << i;
} else {
if (!has_abs_err && !has_rel_err) {
// the default for existing tests
EXPECT_NEAR(cur_expected[i], cur_actual[i], threshold) << "i:" << i;
} else {
if (has_abs_err) {
EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.absolute_error))
<< "i:" << i;
}
if (has_rel_err) {
EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.relative_error) * std::abs(cur_expected[i]))
<< "i:" << i;
}
}
T tolerance = has_tolerance ? get_tolerance<T, T>(params, cur_expected[i]) : threshold;
EXPECT_NEAR(cur_expected[i], cur_actual[i], tolerance) << "i:" << i;
}
}
}
Expand Down Expand Up @@ -308,34 +333,22 @@ struct TensorCheck<MLFloat16> {
sort_expected_and_actual_buffers<float>(f_expected, f_actual);
}

const bool has_abs_err = params.absolute_error.has_value();
const bool has_rel_err = params.relative_error.has_value();
const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value();

float threshold = 0.001f;
#if defined(USE_TENSORRT) || defined(ENABLE_TRAINING_CORE) || defined(USE_CUDA) || defined(USE_ROCM)
threshold = 0.005f;
#elif defined(USE_DML)
threshold = 0.02f;
#endif
for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
if (std::isnan(f_expected[i])) {
EXPECT_TRUE(std::isnan(f_expected[i])) << "Expected NaN. i:" << i;
} else if (std::isinf(f_expected[i])) { // Test infinity for equality
EXPECT_EQ(f_expected[i], f_actual[i]) << "Expected infinity. i:" << i;
} else {
if (!has_abs_err && !has_rel_err) {
// the default for existing tests
EXPECT_NEAR(f_expected[i], f_actual[i], threshold) << "i:" << i;
} else {
if (has_abs_err) {
EXPECT_NEAR(f_expected[i], f_actual[i], *(params.absolute_error))
<< "i:" << i;
}
if (has_rel_err) {
EXPECT_NEAR(f_expected[i], f_actual[i], *(params.relative_error) * std::abs(static_cast<float>(cur_expected[i])))
<< "i:" << i;
}
}
float tolerance = has_tolerance ? get_tolerance<float, MLFloat16>(params, f_expected[i]) : threshold;
EXPECT_NEAR(f_expected[i], f_actual[i], tolerance) << "i:" << i;
}
}
}
Expand All @@ -362,32 +375,24 @@ struct TensorCheck<BFloat16> {
sort_expected_and_actual_buffers<float>(f_expected, f_actual);
}

/// XXX: May need to adjust threshold as BFloat is coarse
const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value();

float abs_threshold = 0.0001f;
float threshold = 0.001f;
float rel_threshold = 0.001f;
#if defined(USE_TENSORRT) || defined(ENABLE_TRAINING_CORE) || defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML) || defined(USE_DNNL)
threshold = 0.05f; // expect at least 95% close
rel_threshold = 0.05f; // expect at least 95% close
#endif

for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
if (std::isnan(f_expected[i])) {
EXPECT_TRUE(std::isnan(f_expected[i])) << "Expected NaN. i:" << i;
} else if (std::isinf(f_expected[i])) { // Test infinity for equality
EXPECT_EQ(f_expected[i], f_actual[i]) << "Expected infinity. i:" << i;
} else {
// the default for existing tests
const float max_value = fmax(fabs(f_expected[i]), fabs(f_actual[i]));
if (max_value != 0) { // max_value = 0 means output and expected are 0s.
const float abs_error = fabs(f_expected[i] - f_actual[i]);
if (abs_error <= abs_threshold) {
// if the absolute error is small enough, then no need to calculate realative error
EXPECT_NEAR(0, abs_error, abs_threshold);
} else {
// default for existing tests.
const float rel_error = abs_error / max_value;
EXPECT_NEAR(0, rel_error, threshold);
}
}
float tolerance = has_tolerance
? get_tolerance<float, BFloat16>(params, f_expected[i])
: get_tolerance<float>(abs_threshold, rel_threshold, f_expected[i]);
EXPECT_NEAR(f_expected[i], f_actual[i], tolerance) << "i:" << i;
}
}
}
Expand Down
Loading