Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust test tolerance #19947

Merged
merged 10 commits into from
Mar 19, 2024
Merged
9 changes: 9 additions & 0 deletions onnxruntime/test/contrib_ops/attention_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,12 @@ static void RunAttentionTest(
tester.AddOptionalInputEdge<int32_t>();
}

if (use_float16) {
tester.SetOutputTolerance(0.005f);
} else {
tester.SetOutputTolerance(0.001f, 0.001f);
}

if (enable_cuda) {
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
execution_providers.push_back(DefaultCudaExecutionProvider());
Expand Down Expand Up @@ -254,6 +260,9 @@ static void RunAttentionTest(
if (enable_dml) {
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
execution_providers.push_back(DefaultDmlExecutionProvider());
if (use_float16) {
tester.SetOutputTolerance(0.02f);
}
tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
}
}
Expand Down
7 changes: 3 additions & 4 deletions onnxruntime/test/contrib_ops/decoder_attention_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@ static void RunAttentionTest(
const std::vector<float>* new_value_cache = nullptr,
const std::vector<float>* key_cache = nullptr,
const std::vector<float>* value_cache = nullptr,
const std::initializer_list<bool>* key_padding_mask_data = nullptr,
bool use_float16 = false) {
int min_cuda_architecture = use_float16 ? 530 : 0;
bool enable_cuda = HasCudaEnvironment(min_cuda_architecture);
const std::initializer_list<bool>* key_padding_mask_data = nullptr) {
bool enable_cuda = HasCudaEnvironment(0);
bool enable_rocm = (nullptr != DefaultRocmExecutionProvider().get());
bool enable_cpu = false;

Expand Down Expand Up @@ -99,6 +97,7 @@ static void RunAttentionTest(
tester.AddOutput<float>("new_key_cache", output_cache_dims, *new_key_cache);
tester.AddOutput<float>("new_value_cache", output_cache_dims, *new_value_cache);
}
tester.SetOutputTolerance(0.001f, 0.001f);

std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
if (enable_cuda) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -754,9 +754,10 @@ TEST(DecoderMaskedSelfAttentionTest, Test_fp32) {

// Output(s)
tester.AddOutput<float>("output", input_dims, output);

tester.AddOutput<float>("present", past_dims, present);

tester.SetOutputTolerance(0.001f, 0.001f);

// Run - Regular kernel execution path
{
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
Expand Down Expand Up @@ -897,9 +898,10 @@ TEST(DecoderMaskedSelfAttentionTest, Test_fp16) {

// Output(s)
tester.AddOutput<MLFloat16>("output", input_dims, output);

tester.AddOutput<MLFloat16>("present", past_dims, present);

tester.SetOutputTolerance(0.005f);

// Run - Regular kernel execution path
{
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/test/contrib_ops/fft_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ TEST(ContribOpTest, Rfft) {
// Target values conputed using PyTorch torch.fft.rfft(X, dim=-1, norm="backward")
test.AddInput<float>("X", {4, 4}, {0.8129f, 1.3108f, -0.8790f, -1.2046f, 0.1661f, -0.9831f, 0.5879f, 0.4918f, 1.2506f, 0.7244f, -2.6260f, -1.1268f, -1.6885f, 1.0439f, -0.2595f, 1.8780f});
test.AddOutput<float>("Y", {4, 3, 2}, {0.0400f, 0.0000f, 1.6919f, -2.5154f, -0.1722f, 0.0000f, 0.2627f, 0.0000f, -0.4218f, 1.4748f, 1.2454f, 0.0000f, -1.7779f, 0.0000f, 3.8766f, -1.8512f, -0.9730f, 0.0000f, 0.9740f, 0.0000f, -1.4290f, 0.8341f, -4.8699f, 0.0000f});
test.SetOutputTolerance(0.0001f);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
}

Expand All @@ -45,6 +46,7 @@ TEST(ContribOpTest, Irfft) {
test.AddAttribute("normalized", static_cast<int64_t>(0));
test.AddInput<float>("X", {4, 3, 2}, {0.0400f, 0.0000f, 1.6919f, -2.5154f, -0.1722f, 0.0000f, 0.2627f, 0.0000f, -0.4218f, 1.4748f, 1.2454f, 0.0000f, -1.7779f, 0.0000f, 3.8766f, -1.8512f, -0.9730f, 0.0000f, 0.9740f, 0.0000f, -1.4290f, 0.8341f, -4.8699f, 0.0000f});
test.AddOutput<float>("Y", {4, 4}, {0.8129f, 1.3108f, -0.8790f, -1.2046f, 0.1661f, -0.9831f, 0.5879f, 0.4918f, 1.2506f, 0.7244f, -2.6260f, -1.1268f, -1.6885f, 1.0439f, -0.2595f, 1.8780f});
test.SetOutputTolerance(0.0001f);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
}
} // namespace test
Expand Down
6 changes: 4 additions & 2 deletions onnxruntime/test/contrib_ops/gemm_fastgelu_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ static void RunGemmFastGeluGpuTest(const std::vector<float>& input_data, const s
tester.AddOutput<float>("Y", output_dims, output_data);
}

tester.SetOutputTolerance(use_float16 ? 0.005f : 0.0025f);

tester.Config(run_with_tunable_op)
.RunWithConfig();
}
Expand Down Expand Up @@ -154,7 +156,7 @@ TEST(GemmFastGeluTest, GemmFastGeluWithoutBiasFloat16) {

RunGemmFastGeluGpuTest(input_data, weight_data, bias_data, output_data,
input_dims, weight_dims, bias_dims, output_dims,
false);
false, true);
}

TEST(GemmFastGeluTest, GemmFastGeluWithBiasFloat16) {
Expand Down Expand Up @@ -189,7 +191,7 @@ TEST(GemmFastGeluTest, GemmFastGeluWithBiasFloat16) {

RunGemmFastGeluGpuTest(input_data, weight_data, bias_data, output_data,
input_dims, weight_dims, bias_dims, output_dims,
true);
true, true);
}

TEST(GemmFastGeluTest, GemmFastGeluWithBias_bfloat16) {
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/test/contrib_ops/gridsample_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ TEST(GridsampleContribOpTest, gridsample_mode_bicubic) {
0.5000f, 0.5000f, 1.0000f, 1.0000f});
test.AddAttribute("mode", "bicubic");
test.AddOutput<float>("Y", {1, 1, 2, 4}, {-0.1406f, 0.3828f, 1.7556f, 2.9688f, 2.9688f, 1.7556f, 5.1445f, 1.3906f});
test.SetOutputTolerance(0.0001f);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaNHWCExecutionProvider});
}

Expand Down
6 changes: 6 additions & 0 deletions onnxruntime/test/contrib_ops/layer_norm_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ TEST(LayerNormTest, LayerNorm_Scale_Bias) {
test.AddInput<float>("gamma", {2}, {-0.6953f, 5.1824f});
test.AddInput<float>("bias", {2}, {0.6435f, -0.3964f});
test.AddOutput<float>("output", dims, {-0.0516f, -5.5776f, -0.0518f, -5.5788f, -0.0518f, -5.5788f});
test.SetOutputTolerance(0.0001f);
test.Run();
}

Expand All @@ -172,6 +173,8 @@ TEST(LayerNormTest, LayerNorm_Scale_Bias_Float16Input) {
test.AddInput<float>("gamma", {2}, {-0.6953f, 5.1824f});
test.AddInput<float>("bias", {2}, {0.6435f, -0.3964f});
test.AddOutput<float>("output", dims, {-0.0516f, -5.5776f, -0.0518f, -5.5788f, -0.0518f, -5.5788f});
test.SetOutputTolerance(0.0001f);

// TRT, DNNL, OpenVINO and NNAPI, CoreML don't support this combination of datatypes
test.Run(OpTester::ExpectResult::kExpectSuccess, "",
{kTensorrtExecutionProvider, kDnnlExecutionProvider, kQnnExecutionProvider,
Expand Down Expand Up @@ -228,6 +231,9 @@ TEST(LayerNormTest, LayerNorm17_double) {
test.AddInput<double>("x", dims, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0});
test.AddInput<double>("gamma", {3}, {1.0, 1.0, 1.0});
test.AddOutput<double>("output", dims, {-1.2247, 0.0, 1.2247, -1.2247, 0.0, 1.2247});

test.SetOutputTolerance(0.0001f);

// DNNL does not support double
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kDnnlExecutionProvider});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ void TestMatMulIntegerToFloat(bool is_matrix_b_constant,

if (std::is_same_v<OType, float>) {
test.AddOutput<float>("Y", {M, N}, Y_data);
test.SetOutputAbsErr("Y", 0.0001f);
test.SetOutputAbsErr("Y", 0.001f);
test.SetOutputRelErr("Y", 0.02f);
} else {
test.AddOutput<MLFloat16>("Y", {M, N}, ToFloat16(Y_data));
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/test/contrib_ops/moe_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ static void RunMoETest(
tester.AddInput<MLFloat16>("fc1_experts_bias", fc1_experts_bias_dims, ToFloat16(fc1_experts_bias));
tester.AddInput<MLFloat16>("fc2_experts_bias", fc2_experts_bias_dims, ToFloat16(fc2_experts_bias));
tester.AddOutput<MLFloat16>("output", output_dims, ToFloat16(output_data));
tester.SetOutputTolerance(0.005f);
} else {
tester.AddInput<float>("input", input_dims, input);
tester.AddInput<float>("router_probs", router_probs_dims, router_probs);
Expand All @@ -55,6 +56,7 @@ static void RunMoETest(
tester.AddInput<float>("fc1_experts_bias", fc1_experts_bias_dims, fc1_experts_bias);
tester.AddInput<float>("fc2_experts_bias", fc2_experts_bias_dims, fc2_experts_bias);
tester.AddOutput<float>("output", output_dims, output_data);
tester.SetOutputTolerance(0.001f);
}

std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ static void RunPackedMultiHeadAttentionTest(
}

tester.AddOutput<MLFloat16>("output", output_dims, ToFloat16(output_data));
tester.SetOutputTolerance(0.005f);
} else {
if (is_packed_qkv) {
tester.AddInput<float>("query", packed_qkv_dims, query_data);
Expand All @@ -131,6 +132,7 @@ static void RunPackedMultiHeadAttentionTest(
}

tester.AddOutput<float>("output", output_dims, output_data);
tester.SetOutputTolerance(0.001f, 0.001f);
}

std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/test/contrib_ops/quantize_attention_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,13 @@ void RunQAttention(const std::vector<float>& input_data,
tester.AddInput<MLFloat16>("input_scale", {1}, ToFloat16({input_quant_params.scale}));
tester.AddInput<MLFloat16>("weight_scale", {1}, ToFloat16({weight_quant_params.scale}));
tester.AddOutput<MLFloat16>("output", output_dims, ToFloat16(output_data));
tester.SetOutputTolerance(0.01f);
} else {
tester.AddInput<float>("bias", bias_dims, bias_data);
tester.AddInput<float>("input_scale", {1}, {input_quant_params.scale});
tester.AddInput<float>("weight_scale", {1}, {weight_quant_params.scale});
tester.AddOutput<float>("output", output_dims, output_data);
tester.SetOutputTolerance(0.005f);
}

if (mask_index_data.size() > 0) {
Expand Down
14 changes: 14 additions & 0 deletions onnxruntime/test/providers/base_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,20 @@ void BaseTester::SetOutputRelErr(const char* name, float v) {
it->validation_params.relative_error = optional<float>(v);
}

void BaseTester::SetOutputTolerance(float abs_error, float rel_error) {
for (auto& output : output_data_) {
if (output.def.Exists()) {
if (abs_error >= 0.0f) {
output.validation_params.absolute_error = optional<float>(abs_error);
}

if (rel_error >= 0.0f) {
output.validation_params.relative_error = optional<float>(rel_error);
}
}
}
}

std::vector<int64_t> BaseTester::GetDimsForProto(gsl::span<const int64_t> dims) {
std::vector<int64_t> dims_for_proto{dims.begin(), dims.end()};
if (add_symbolic_dim_to_tensor_data_ >= 0 &&
Expand Down
11 changes: 11 additions & 0 deletions onnxruntime/test/providers/base_tester.h
Original file line number Diff line number Diff line change
Expand Up @@ -519,9 +519,20 @@ class BaseTester {
custom_session_registries_.push_back(registry);
}

// For floating types (double/float/half/bfloat16), the tolerance is:
// abs(expected_value - actual_value) <= absolute + relative * expected_value
// For integer types, tolerance parameters are ignored except the following cases:
// For uint8, tolerance is only applied to NNAPI/XNNPACK/DML providers;
// For int8, only absolute is used, and relative is ignored. See checkers.cc for detail.
// If absolute or relative errors are not set, default values are used (search DefaultTolerance for details).
void SetOutputAbsErr(const char* name, float v);
void SetOutputRelErr(const char* name, float v);

// Set absolute and relative error for all existed outputs.
// Negative value will be ignored.
// Note that it will not set tolerance for new outputs added after this call.
void SetOutputTolerance(float abs_error, float rel_error = -1.0f);

// Number of times to call InferenceSession::Run. The same feeds are used each time.
// e.g. used to verify the generator ops behave as expected
void SetNumRunCalls(int n) {
Expand Down
Loading
Loading