From c74409ea8ee1cfbb295d0d1d4a7e8e116c29257f Mon Sep 17 00:00:00 2001 From: Anagha Rao Date: Wed, 24 Jan 2024 22:30:33 -0800 Subject: [PATCH 1/7] Working FP32 tests --- .../matmul_integer_to_float_test.cc | 554 ++++++++++++++++-- 1 file changed, 508 insertions(+), 46 deletions(-) diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc index d1883815c1a6f..53af9ae43eaef 100644 --- a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc @@ -32,40 +32,61 @@ void TestMatMulIntegerToFloat(const std::vector& A_dims, bool has_zp = true, bool has_bias = false) { // create rand inputs - RandomValueGenerator random{}; + RandomValueGenerator random{2502124740}; + per_column = reference_model.length() < 0; std::vector A_data; - std::vector tmp_A_data = random.Uniform(A_dims, - std::numeric_limits::lowest(), - std::numeric_limits::max()); - std::transform(tmp_A_data.begin(), tmp_A_data.end(), std::back_inserter(A_data), [](int32_t v) -> WType { + std::vector tmp_A_data = random.Uniform(A_dims, + std::numeric_limits::lowest(), + std::numeric_limits::max()); + std::transform(tmp_A_data.begin(), tmp_A_data.end(), std::back_inserter(A_data), [](int32_t v) -> IType { + //v = 1; return static_cast(v); }); std::vector B_data; - std::vector tmp_B_data = random.Uniform(B_dims, + +//#if defined(USE_DML) +// std::vector tmp_B_data = random.Uniform(B_dims, +// (constexpr(std::is_same_v) ? -2 : 1), +// 5); +//#else + std::vector tmp_B_data = random.Uniform(B_dims, std::numeric_limits::lowest(), std::numeric_limits::max()); +//#endif + std::transform(tmp_B_data.begin(), tmp_B_data.end(), std::back_inserter(B_data), [](int32_t v) -> WType { - return static_cast(v); + //v = 1; + return static_cast(v); }); - std::vector A_scale = random.Uniform(AsSpan({1}), -0.1f, 0.1f); + //std::vector A_scale = random.Uniform(AsSpan({1}), -0.1f, 0.1f); + std::vector A_scale(1, static_cast(1.0f)); std::vector A_zero_point{(std::numeric_limits::lowest() + std::numeric_limits::max() + IType(2)) / 2}; int64_t b_scale_zp_size = per_column ? B_dims.back() : 1; - std::vector B_scale = random.Uniform(AsSpan({b_scale_zp_size}), -0.1f, 0.1f); + //int64_t b_scale_zp_size = B_dims.back(); + std::vector B_scale = random.Uniform(AsSpan({b_scale_zp_size}), static_cast(-0.1f), static_cast(0.1f)); + //std::vector B_scale (b_scale_zp_size, static_cast(1.0f)); + + //std::vector B_zero_point(b_scale_zp_size, 1); std::vector B_zero_point(b_scale_zp_size); - std::for_each(B_zero_point.begin(), - B_zero_point.end(), - [&random](WType& zp) { - zp = static_cast(random.Uniform(std::array{1}, + if (has_zp) { + std::for_each(B_zero_point.begin(), + B_zero_point.end(), + [&random](WType& zp) { + zp = static_cast(random.Uniform(std::array{1}, std::numeric_limits::lowest(), - std::numeric_limits::max())[0]); - }); + std::numeric_limits::max() / 2)[0]); + }); + } else { + B_zero_point = {0}; + } - std::vector Bias = random.Uniform(AsSpan({B_dims.back()}), -0.1f, 0.1f); + //std::vector Bias = random.Uniform(AsSpan({B_dims.back()}), -0.1f, 0.1f); + std::vector Bias(B_dims.back(), static_cast(0.0f)); OpTester test("MatMulIntegerToFloat", 1, onnxruntime::kMSDomain); test.AddInput("A", A_dims, A_data); @@ -77,7 +98,7 @@ void TestMatMulIntegerToFloat(const std::vector& A_dims, test.AddInput("a_zero_point", {1}, A_zero_point); test.AddInput("b_zero_point", {b_scale_zp_size}, B_zero_point); } else { - test.AddOptionalInputEdge(); + test.AddOptionalInputEdge(); test.AddOptionalInputEdge(); } @@ -86,19 +107,68 @@ void TestMatMulIntegerToFloat(const std::vector& A_dims, } else { test.AddOptionalInputEdge(); } - - test.AddReferenceOutputs(reference_model); -#if defined(USE_DML) - if constexpr (std::is_same_v) { - test.SetOutputRelErr("Y", 2e-2f); - } else { - test.SetOutputRelErr("Y", 2.0f); - } -#else - test.SetOutputRelErr("Y", 1e-4f); -#endif - - if constexpr (std::is_same_v) { + int64_t M = 10; + int64_t N = 10; + int64_t K = 10; + std::vector expected_vals(M * N); + + //if (constexpr(std::is_same_v)) + //{ + for (int64_t m = 0; m < M; m++) { + for (int64_t n = 0; n < N; n++) { + float sum = 0.0f; + for (int64_t k = 0; k < K; k++) { + float AIntermediate = has_zp ? (A_data[m * K + k] - A_zero_point[0]) : A_data[m * K + k]; + float BIntermediate = has_zp ? (B_data[k * N + n] - B_zero_point[0]) : B_data[k * N + n]; + sum += (AIntermediate * A_scale[0]) * (BIntermediate * B_scale[0]); + } + if (has_bias) { + // sum += Bias[m * N + n]; + sum += Bias[n]; + } + expected_vals[m * N + n] = static_cast(sum); + } + } + if (constexpr(std::is_same_v)) { + test.AddOutput("Y", {M, N}, expected_vals); + } else { + test.AddOutput("Y", {M, N}, ToFloat16(expected_vals)); + } + //} else { + // MLFloat16 AZP = static_cast(A_zero_point[0]); + // MLFloat16 BZP = static_cast(B_zero_point[0]); + // for (int64_t m = 0; m < M; m++) { + // for (int64_t n = 0; n < N; n++) { + // MLFloat16 sum = static_cast(0.0f); + // for (int64_t k = 0; k < K; k++) { + // MLFloat16 AIntermediate = (has_zp ? (A_data[m * K + k] - AZP) : A_data[m * K + k]); + // MLFloat16 BIntermediate = (has_zp ? (B_data[k * N + n] - BZP) : B_data[k * N + n]); + // sum += (AIntermediate * A_scale[0]) * (BIntermediate * B_scale[0]); + // } + // if (has_bias) { + // // sum += Bias[m * N + n]; + // sum += static_cast(Bias[n]); + // } + // expected_vals[m * N + n] = static_cast(sum); + // } + // } + // test.AddOutput("Y", {M, N}, expected_vals); + //} + + //test.AddReferenceOutputs(reference_model); +//#if defined(USE_DML) +// if constexpr (std::is_same_v) { +// test.SetOutputRelErr("Y", 2e-2f); +// } else { +// //test.SetOutputRelErr("Y", 1.0f); +// test.SetOutputAbsErr("Y", 0.5f); +// //test.SetOutputRelErr("Y", 2e-2f); +// } +//#else +// test.SetOutputRelErr("Y", 1e-4f); +//#endif + + if (constexpr(std::is_same_v) && constexpr(std::is_same_v) && constexpr(std::is_same_v)) { test.Run(); } else { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCpuExecutionProvider}); @@ -107,9 +177,9 @@ void TestMatMulIntegerToFloat(const std::vector& A_dims, template void RunMatMulIntegerToFloatTest(const string& model_path) { - std::vector A_dims{4, 128}; - std::vector B_dims{128, 128}; - std::vector Y_dims{4, 128}; + std::vector A_dims{10, 10}; + std::vector B_dims{10, 10}; + std::vector Y_dims{10, 10}; TestMatMulIntegerToFloat( A_dims, @@ -153,43 +223,198 @@ void RunMatMulIntegerToFloatTest(const string& model_path) { } #if USE_DML -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8X8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8.onnx"); - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); +//TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8X8_FP16) { +// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8.onnx"); +// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); +//} +// +//TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8X8_FP16) { +// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); +//} +// +//TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8X8_FP16) { +// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_bias.onnx"); +// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); +//} +// +//TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8_FP16) { +// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); +//} +// +//TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8_FP16) { +// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); +//} +// +//TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8_FP16) { +// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); +//} +// +//TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8S8_FP16) { +// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); +//} +// +//TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8S8_FP16) { +// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); +//} +#endif // USE_DML + +#if USE_DML + +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8S8_FP16) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8.onnx"); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8S8_FP16) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_bias.onnx"); } -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8X8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_bias.onnx"); - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8S8_FP16) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); +} + +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8_FP16) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); } TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); +} + +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8_FP16) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); } TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); } -#endif // USE_DML -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8X8) { +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8U8_FP16) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8S8_FP16) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8U8_FP16) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); +} + +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8S8_FP16) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); +} + +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8U8_FP16) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); +} + +#endif + +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8U8_FP16) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8U8_FP16) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8U8_FP16) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); +} + +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8_FP16) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); +} + + + + + + + + + + + + + + + + + + + + +#if USE_DML + +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8S8) { RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8.onnx"); - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8.onnx"); } -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8X8) { +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8S8) { RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_bias.onnx"); - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8_bias.onnx"); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8S8) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8.onnx"); +} + +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8_bias.onnx"); } TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8) { RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8.onnx"); } +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8.onnx"); +} + TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8) { RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8_bias.onnx"); } +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8U8) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8_bias.onnx"); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8S8) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8.onnx"); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8U8) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8.onnx"); +} + +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8S8) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8_bias.onnx"); +} + +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8U8) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8_bias.onnx"); +} + +#endif + +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8U8) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8.onnx"); +} + + +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8U8) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8_bias.onnx"); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8U8) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8.onnx"); +} + + +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8) { + RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8_bias.onnx"); +} + TEST(MatMulIntegerToFloat, MatMulInteger_With_ZeroPoint) { auto test_case = [&](const std::vector& input_shape, const std::vector& weights_shape, @@ -253,5 +478,242 @@ TEST(MatMulIntegerToFloat, MatMulInteger_With_ZeroPoint) { test_case({15, 14, 13}, {15, 13, 27}, {15, 1, 27}); } +TEST(MatMulIntegerToFloat, CustomMatMul) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 2; + int64_t N = 2; + int64_t K = 2; + + std::vector AMatrix = {1, 1, + 1, 1}; + std::vector BMatrix = {1, 1, + 1, 1}; + test.AddInput("A", {M,K}, AMatrix); + test.AddInput("B", {N,K}, BMatrix); + + test.AddInput("a_scale", {}, {1.0f}); + test.AddInput("b_scale", {}, {1.0f}); + //test.AddInput("a_zero_point", {}, {113}); + + std::vector expected_vals(M * N); + for (int64_t m = 0; m < M; m++) { + for (int64_t n = 0; n < N; n++) { + float sum = 0.0f; + for (int64_t k = 0; k < K; k++) { + sum += AMatrix[m * K + k] * BMatrix[k * N + n]; + } + expected_vals[m * N + n] = sum; + } + } + + test.AddOutput("Y", {M , N}, expected_vals); + + test.Run(); +} + +TEST(MatMulIntegerToFloat, CustomZPMatMul) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 2; + int64_t N = 2; + int64_t K = 2; + + std::vector AMatrix = {1, 1, + 1, 1}; + std::vector BMatrix = {1, -1, + 1, 1}; + float AScale = 1.0f; + float BScale = 1.0f; + + uint8_t AZP = 113; + int8_t BZP = -16; + + test.AddInput("A", {M, K}, AMatrix); + test.AddInput("B", {N, K}, BMatrix); + + test.AddInput("a_scale", {}, {AScale}); + test.AddInput("b_scale", {}, {BScale}); + test.AddInput("a_zero_point", {}, {AZP}); + test.AddInput("b_zero_point", {}, {BZP}); + + std::vector expected_vals(M * N); + for (int64_t m = 0; m < M; m++) { + for (int64_t n = 0; n < N; n++) { + float sum = 0.0f; + for (int64_t k = 0; k < K; k++) { + sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP) * BScale); + } + expected_vals[m * N + n] = sum; + } + } + + test.AddOutput("Y", {M, N}, expected_vals); + + test.Run(); +} + +TEST(MatMulIntegerToFloat, CustomScaleMatMul) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 2; + int64_t N = 2; + int64_t K = 2; + + std::vector AMatrix = {1, 1, + 1, 1}; + std::vector BMatrix = {1, 1, + 1, 1}; + float AScale = 0.910f; + float BScale = 1.10f; + + uint8_t AZP = 1; + uint8_t BZP= 1; + + test.AddInput("A", {M, K}, AMatrix); + test.AddInput("B", {N, K}, BMatrix); + + test.AddInput("a_scale", {}, {AScale}); + test.AddInput("b_scale", {}, {BScale}); + test.AddInput("a_zero_point", {}, {AZP}); + test.AddInput("b_zero_point", {}, {BZP}); + + std::vector expected_vals(M * N); + for (int64_t m = 0; m < M; m++) { + for (int64_t n = 0; n < N; n++) { + float sum = 0.0f; + for (int64_t k = 0; k < K; k++) { + sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP) * BScale); + } + expected_vals[m * N + n] = sum; + } + } + + test.AddOutput("Y", {M, N}, expected_vals); + + test.Run(); +} + +TEST(MatMulIntegerToFloat, CustomMatMul1) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 2; + int64_t N = 2; + int64_t K = 2; + + std::vector AMatrix = {11, -2, + -1, 3}; + std::vector BMatrix = {-13, -2, + -1, 23}; + float AScale = 0.910f; + float BScale = 1.10f; + + int8_t AZP = 113; + int8_t BZP = 98; + + test.AddInput("A", {M, K}, AMatrix); + test.AddInput("B", {N, K}, BMatrix); + + test.AddInput("a_scale", {}, {AScale}); + test.AddInput("b_scale", {}, {BScale}); + test.AddInput("a_zero_point", {}, {AZP}); + test.AddInput("b_zero_point", {}, {BZP}); + + std::vector expected_vals(M * N); + for (int64_t m = 0; m < M; m++) { + for (int64_t n = 0; n < N; n++) { + float sum = 0.0f; + for (int64_t k = 0; k < K; k++) { + sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP) * BScale); + } + expected_vals[m * N + n] = sum; + } + } + + test.AddOutput("Y", {M, N}, expected_vals); + + test.Run(); +} + +TEST(MatMulIntegerToFloat, CustomMatMul2) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 2; + int64_t N = 2; + int64_t K = 2; + + std::vector AMatrix = {11, -2, + -1, 3}; + std::vector BMatrix = {-13, -2, + -1, 23}; + float AScale = 0.910f; + std::vector BScale = {1.10f, 1.123f}; + + int8_t AZP = 113; + std::vector BZP = {98, 71}; + + test.AddInput("A", {M, K}, AMatrix); + test.AddInput("B", {K, N}, BMatrix); + + test.AddInput("a_scale", {}, {AScale}); + test.AddInput("b_scale", {N}, BScale); + test.AddInput("a_zero_point", {}, {AZP}); + test.AddInput("b_zero_point", {N}, BZP); + + std::vector expected_vals(M * N); + for (int64_t m = 0; m < M; m++) { + for (int64_t n = 0; n < N; n++) { + float sum = 0.0f; + for (int64_t k = 0; k < K; k++) { + sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP[n]) * BScale[n]); + } + expected_vals[m * N + n] = sum; + } + } + + test.AddOutput("Y", {M, N}, expected_vals); + + test.Run(); +} + +TEST(MatMulIntegerToFloat, CustomBiasMatMul) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 2; + int64_t N = 2; + int64_t K = 3; + + std::vector AMatrix = {11, -2, 5, + -1, 3, 10}; + std::vector BMatrix = {-13, -2, + 9, 55, + -1, 23}; + float AScale = 0.910f; + std::vector BScale = {1.10f, 1.123f}; + + int8_t AZP = 113; + std::vector BZP = {98, 71}; + + std::vector Bias = {0.10f, 1.123f}; + + test.AddInput("A", {M, K}, AMatrix); + test.AddInput("B", {K, N}, BMatrix); + + test.AddInput("a_scale", {}, {AScale}); + test.AddInput("b_scale", {N}, BScale); + test.AddInput("a_zero_point", {}, {AZP}); + test.AddInput("b_zero_point", {N}, BZP); + test.AddInput("bias", {N}, Bias); + + std::vector expected_vals(M * N); + for (int64_t m = 0; m < M; m++) { + for (int64_t n = 0; n < N; n++) { + float sum = 0.0f; + for (int64_t k = 0; k < K; k++) { + sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP[n]) * BScale[n]); + } + expected_vals[m * N + n] = sum + Bias[n]; + } + } + + test.AddOutput("Y", {M, N}, expected_vals); + + test.Run(); +} + } // namespace test } // namespace onnxruntime From 23cce714a38d91cb7f48e4958da3d8d9b5e5e0ba Mon Sep 17 00:00:00 2001 From: Anagha Rao Date: Mon, 29 Jan 2024 22:40:33 -0800 Subject: [PATCH 2/7] Working tests all --- .../src/External/DirectMLHelpers/ApiTraits.h | 2 +- .../dml/OperatorAuthorHelper/OperatorHelper.h | 10 +- .../matmul_integer_to_float_test.cc | 713 ++++++------------ .../test/testdata/matmul_integer_to_float.py | 30 +- .../matmul_integer_to_float16_int8.onnx | 51 -- .../matmul_integer_to_float16_int8_bias.onnx | 49 -- .../matmul_integer_to_float16_int8_int8.onnx | 51 -- ...mul_integer_to_float16_int8_int8_bias.onnx | 49 -- .../matmul_integer_to_float16_int8.onnx | 51 -- 9 files changed, 240 insertions(+), 766 deletions(-) delete mode 100644 onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx delete mode 100644 onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx delete mode 100644 onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx delete mode 100644 onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx delete mode 100644 onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16_int8.onnx diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h index a7a6e59e400ef..3c0f49f3d2d49 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h @@ -882,7 +882,7 @@ struct OperatorDescTraits template <> struct OperatorDescTraits { - static constexpr DML_OPERATOR_TYPE Type = (DML_OPERATOR_TYPE) DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT; + static constexpr DML_OPERATOR_TYPE Type = DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT; }; template <> diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h index 7146edd861257..d5a66a74237ee 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h @@ -826,14 +826,6 @@ class QLinearMatMulHelper : public MatMulHelperBase QLinearMatMulHelper(const Info_t& info, const Shape_t& shape) : MatMulHelperBase(info, shape, 0, 3) {} }; -class MatMulIntegerToFloatHelper : public MatMulHelperBase -{ -public: - template - MatMulIntegerToFloatHelper(const Info_t& info, const Shape_t& shape) : MatMulHelperBase(info, shape, 0, 1) {} -}; - - class TopKHelper { void Initialize( @@ -1752,7 +1744,7 @@ using ShapeInferenceHelper_Identity16 = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_MatMul = MatMulHelper; using ShapeInferenceHelper_MatMulInteger = MatMulHelper; using ShapeInferenceHelper_DynamicQuantizeMatMul = MatMulHelper; -using ShapeInferenceHelper_MatMulIntegerToFloat = MatMulIntegerToFloatHelper; +using ShapeInferenceHelper_MatMulIntegerToFloat = MatMulHelper; using ShapeInferenceHelper_QLinearMatMul = QLinearMatMulHelper; using ShapeInferenceHelper_QLinearAdd = GetBroadcastedOutputShapeHelper; using ShapeInferenceHelper_DynamicQuantizeLinear = GetOutputShapeAsInputShapeHelper; diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc index 53af9ae43eaef..49560b8ff268a 100644 --- a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc @@ -24,69 +24,78 @@ namespace onnxruntime { namespace test { template -void TestMatMulIntegerToFloat(const std::vector& A_dims, - std::vector B_dims, - const std::string& reference_model, - bool is_matrix_b_constant, +static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, const int64_t K, const std::vector& A_data, const std::vector& A_scale, const std::vector& A_zero_point, const std::vector& B_data, std::vector& B_scale, std::vector& B_zero_point, const + std::vector& Bias, std::vector& Y_data, bool per_column, bool has_zp, bool has_bias) { + + if (!per_column) { + B_zero_point.resize(N, B_zero_point[0]); + B_scale.resize(N, B_scale[0]); + } + + for (int64_t m = 0; m < M; m++) { + for (int64_t n = 0; n < N; n++) { + float sum = 0.0f; + for (int64_t k = 0; k < K; k++) { + float A_dequantized = has_zp ? (A_data[m * K + k] - A_zero_point[0]) * A_scale[0] : A_data[m * K + k] * A_scale[0]; + float B_dequantized = has_zp ? (B_data[k * N + n] - B_zero_point[n]) * B_scale[n] : B_data[k * N + n] * B_scale[n]; + + sum += A_dequantized * B_dequantized; + } + if (has_bias) { + sum += Bias[n]; + } + Y_data[m * N + n] = static_cast(sum); + } + } +} + +template +void TestMatMulIntegerToFloat(bool is_matrix_b_constant, bool per_column = false, bool has_zp = true, bool has_bias = false) { // create rand inputs - RandomValueGenerator random{2502124740}; - per_column = reference_model.length() < 0; - + RandomValueGenerator random{}; + int64_t M = 4; + int64_t N = 128; + int64_t K = 128; + std::vector A_dims{M, K}; + std::vector B_dims{K, N}; + std::vector Y_dims{M, K}; std::vector A_data; std::vector tmp_A_data = random.Uniform(A_dims, std::numeric_limits::lowest(), std::numeric_limits::max()); std::transform(tmp_A_data.begin(), tmp_A_data.end(), std::back_inserter(A_data), [](int32_t v) -> IType { - //v = 1; return static_cast(v); }); std::vector B_data; -//#if defined(USE_DML) -// std::vector tmp_B_data = random.Uniform(B_dims, -// (constexpr(std::is_same_v) ? -2 : 1), -// 5); -//#else std::vector tmp_B_data = random.Uniform(B_dims, std::numeric_limits::lowest(), std::numeric_limits::max()); -//#endif std::transform(tmp_B_data.begin(), tmp_B_data.end(), std::back_inserter(B_data), [](int32_t v) -> WType { - //v = 1; return static_cast(v); }); - //std::vector A_scale = random.Uniform(AsSpan({1}), -0.1f, 0.1f); - std::vector A_scale(1, static_cast(1.0f)); + std::vector A_scale = random.Uniform(AsSpan({1}), -0.1f, 0.1f); std::vector A_zero_point{(std::numeric_limits::lowest() + std::numeric_limits::max() + IType(2)) / 2}; int64_t b_scale_zp_size = per_column ? B_dims.back() : 1; - //int64_t b_scale_zp_size = B_dims.back(); - std::vector B_scale = random.Uniform(AsSpan({b_scale_zp_size}), static_cast(-0.1f), static_cast(0.1f)); - //std::vector B_scale (b_scale_zp_size, static_cast(1.0f)); - - //std::vector B_zero_point(b_scale_zp_size, 1); + std::vector B_scale = random.Uniform(AsSpan({b_scale_zp_size}), -0.1f, 0.1f); std::vector B_zero_point(b_scale_zp_size); - if (has_zp) { - std::for_each(B_zero_point.begin(), - B_zero_point.end(), - [&random](WType& zp) { - zp = static_cast(random.Uniform(std::array{1}, - std::numeric_limits::lowest(), - std::numeric_limits::max() / 2)[0]); - }); - } else { - B_zero_point = {0}; - } + std::for_each(B_zero_point.begin(), + B_zero_point.end(), + [&random](WType& zp) { + zp = static_cast(random.Uniform(std::array{1}, + std::numeric_limits::lowest(), + std::numeric_limits::max())[0]); + }); - //std::vector Bias = random.Uniform(AsSpan({B_dims.back()}), -0.1f, 0.1f); - std::vector Bias(B_dims.back(), static_cast(0.0f)); + std::vector Bias = random.Uniform(AsSpan({B_dims.back()}), -0.1f, 0.1f); OpTester test("MatMulIntegerToFloat", 1, onnxruntime::kMSDomain); test.AddInput("A", A_dims, A_data); @@ -107,84 +116,33 @@ void TestMatMulIntegerToFloat(const std::vector& A_dims, } else { test.AddOptionalInputEdge(); } - int64_t M = 10; - int64_t N = 10; - int64_t K = 10; - std::vector expected_vals(M * N); - - //if (constexpr(std::is_same_v)) - //{ - for (int64_t m = 0; m < M; m++) { - for (int64_t n = 0; n < N; n++) { - float sum = 0.0f; - for (int64_t k = 0; k < K; k++) { - float AIntermediate = has_zp ? (A_data[m * K + k] - A_zero_point[0]) : A_data[m * K + k]; - float BIntermediate = has_zp ? (B_data[k * N + n] - B_zero_point[0]) : B_data[k * N + n]; - sum += (AIntermediate * A_scale[0]) * (BIntermediate * B_scale[0]); - } - if (has_bias) { - // sum += Bias[m * N + n]; - sum += Bias[n]; - } - expected_vals[m * N + n] = static_cast(sum); - } - } - if (constexpr(std::is_same_v)) { - test.AddOutput("Y", {M, N}, expected_vals); + + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, per_column, has_zp, has_bias); + + if ( constexpr(std::is_same_v)) { + test.AddOutput("Y", {M, N}, Y_data); } else { - test.AddOutput("Y", {M, N}, ToFloat16(expected_vals)); + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + test.SetOutputAbsErr("Y", 0.5f); } - //} else { - // MLFloat16 AZP = static_cast(A_zero_point[0]); - // MLFloat16 BZP = static_cast(B_zero_point[0]); - // for (int64_t m = 0; m < M; m++) { - // for (int64_t n = 0; n < N; n++) { - // MLFloat16 sum = static_cast(0.0f); - // for (int64_t k = 0; k < K; k++) { - // MLFloat16 AIntermediate = (has_zp ? (A_data[m * K + k] - AZP) : A_data[m * K + k]); - // MLFloat16 BIntermediate = (has_zp ? (B_data[k * N + n] - BZP) : B_data[k * N + n]); - // sum += (AIntermediate * A_scale[0]) * (BIntermediate * B_scale[0]); - // } - // if (has_bias) { - // // sum += Bias[m * N + n]; - // sum += static_cast(Bias[n]); - // } - // expected_vals[m * N + n] = static_cast(sum); - // } - // } - // test.AddOutput("Y", {M, N}, expected_vals); - //} - - //test.AddReferenceOutputs(reference_model); -//#if defined(USE_DML) -// if constexpr (std::is_same_v) { -// test.SetOutputRelErr("Y", 2e-2f); -// } else { -// //test.SetOutputRelErr("Y", 1.0f); -// test.SetOutputAbsErr("Y", 0.5f); -// //test.SetOutputRelErr("Y", 2e-2f); -// } -//#else -// test.SetOutputRelErr("Y", 1e-4f); -//#endif - - if (constexpr(std::is_same_v) && constexpr(std::is_same_v) && constexpr(std::is_same_v)) { + + // Only DML EP supports these data type combinations for now + if ((constexpr(std::is_same_v)) || + (constexpr(std::is_same_v) && + /*(constexpr(std::is_same_v) &&*/ !constexpr(std::is_same_v)) + ) { + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } else { test.Run(); - } else { - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCpuExecutionProvider}); } } template -void RunMatMulIntegerToFloatTest(const string& model_path) { - std::vector A_dims{10, 10}; - std::vector B_dims{10, 10}; - std::vector Y_dims{10, 10}; - +void RunMatMulIntegerToFloatTest() { TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, false, /*is_matrix_b_constant*/ false, /*per_column*/ HasZeroPoint, /*has_zp*/ @@ -192,9 +150,6 @@ void RunMatMulIntegerToFloatTest(const string& model_path) { ); TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, true, /*is_matrix_b_constant*/ false, /*per_column*/ HasZeroPoint, /*has_zp*/ @@ -202,9 +157,6 @@ void RunMatMulIntegerToFloatTest(const string& model_path) { ); TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, false, /*is_matrix_b_constant*/ true, /*per_column*/ HasZeroPoint, /*has_zp*/ @@ -212,9 +164,6 @@ void RunMatMulIntegerToFloatTest(const string& model_path) { ); TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, true, /*is_matrix_b_constant*/ true, /*per_column*/ HasZeroPoint, /*has_zp*/ @@ -222,198 +171,247 @@ void RunMatMulIntegerToFloatTest(const string& model_path) { ); } -#if USE_DML -//TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8X8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8.onnx"); -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); -//} -// -//TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8X8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); -//} -// -//TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8X8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_bias.onnx"); -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); -//} -// -//TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); -//} -// -//TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); -//} -// -//TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); -//} -// -//TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8S8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); -//} -// -//TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8S8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); -//} -#endif // USE_DML - -#if USE_DML - -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_bias.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8S8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8S8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); -} -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8U8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); -} -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8U8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); -} +// DML EP supports Float16 output type and A Matrix and B Matric of different data types for Float32 output +#if defined(USE_DML) -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8U8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8) { + RunMatMulIntegerToFloatTest(); } -#endif - -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8U8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8U8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8U8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); } +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); +} +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); +} +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); +} +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8U8) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 5; + int64_t N = 5; + int64_t K = 2; + std::vector A_data = {1, 5, 2, 1, 9, + 1, 1, 3, 7, 2}; + std::vector B_data = {3, 7, 2, 1, 1, + 2, 1, 9, 1, 1}; + std::vector A_scale = ToFloat16({3.0f}); + std::vector B_scale = ToFloat16({2.0f}); + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); + std::vector A_zero_point = {3}; + std::vector B_zero_point = {5}; + test.AddInput("a_scale", {1}, A_scale); + test.AddInput("b_scale", {1}, B_scale); + test.AddInput("a_zero_point", {1}, A_zero_point); + test.AddInput("b_zero_point", {1}, B_zero_point); + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, {}, Y_data, false, true, false); + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); +} +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8S8) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 5; + int64_t N = 5; + int64_t K = 2; + std::vector A_data = {3, 7, 2, 1, 1, + 2, 1, 9, 1, 1}; + std::vector B_data = {2, -1, -9, 1, 1, + -1, 0, -3, 1, -4}; + std::vector A_scale = ToFloat16({-4.0f}); + std::vector B_scale = ToFloat16({2.0f}); + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); + std::vector A_zero_point = {1}; + std::vector B_zero_point = {3}; + std::vector Bias = ToFloat16({11.0f, -17.0f, 1.0f, -3.0f, 12.0f}); + test.AddInput("a_scale", {1}, A_scale); + test.AddInput("b_scale", {1}, B_scale); + test.AddInput("a_zero_point", {1}, A_zero_point); + test.AddInput("b_zero_point", {1}, B_zero_point); + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, {}, Y_data, false, true, false); + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); +} +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8S8) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 5; + int64_t N = 5; + int64_t K = 2; + std::vector A_data = {3, 7, -2, 1, 1, + 2, -1, -9, 1, 1}; + std::vector B_data = {2, -1, -9, 1, 1, + -1, 0, -3, 1, -4}; + std::vector A_scale = ToFloat16({-4.0f}); + std::vector B_scale = ToFloat16({2.0f}); + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); + std::vector A_zero_point = {-1}; + std::vector B_zero_point = {3}; + std::vector Bias = ToFloat16({11.0f, -17.0f, 1.0f, -3.0f, 12.0f}); + test.AddInput("a_scale", {1}, A_scale); + test.AddInput("b_scale", {1}, B_scale); + test.AddInput("a_zero_point", {1}, A_zero_point); + test.AddInput("b_zero_point", {1}, B_zero_point); + test.AddInput("bias", {N}, Bias); -#if USE_DML - -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8.onnx"); -} - -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_bias.onnx"); -} - -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8.onnx"); -} + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, false, true, true); -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8_bias.onnx"); -} + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8.onnx"); + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); } -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8.onnx"); -} +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8U8) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 5; + int64_t N = 5; + int64_t K = 2; -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8_bias.onnx"); -} + std::vector A_data = {3, 7, -2, 1, 1, + 2, -1, -9, 1, 1}; + std::vector B_data = {3, 7, 2, 1, 1, + 2, 1, 9, 1, 1}; + std::vector A_scale = ToFloat16({-4.0f}); + std::vector B_scale = ToFloat16({2.0f}); + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); + std::vector A_zero_point = {-1}; + std::vector B_zero_point = {3}; + std::vector Bias = ToFloat16({11.0f, -17.0f, 1.0f, -3.0f, 12.0f}); -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8U8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8_bias.onnx"); -} + test.AddInput("a_scale", {1}, A_scale); + test.AddInput("b_scale", {1}, B_scale); + test.AddInput("a_zero_point", {1}, A_zero_point); + test.AddInput("b_zero_point", {1}, B_zero_point); + test.AddInput("bias", {N}, Bias); -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8.onnx"); -} + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, false, true, true); -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8U8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8.onnx"); -} + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8_bias.onnx"); + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); } -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8U8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8_bias.onnx"); -} +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 2; + int64_t N = 2; + int64_t K = 3; -#endif + std::vector A_data = {11, -2, 5, + -1, 3, 10}; + std::vector B_data = {-13, -2, + 9, 55, + -1, 23}; + std::vector A_scale = ToFloat16({0.910f}); + std::vector B_scale = ToFloat16({1.10f, 1.123f}); -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8U8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8.onnx"); -} + std::vector A_zero_point = {113}; + std::vector B_zero_point = {98, 71}; + std::vector Bias = ToFloat16({0.10f, 1.123f}); -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8U8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8_bias.onnx"); -} + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8U8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8.onnx"); -} + test.AddInput("a_scale", {}, {A_scale}); + test.AddInput("b_scale", {N}, B_scale); + test.AddInput("a_zero_point", {}, {A_zero_point}); + test.AddInput("b_zero_point", {N}, B_zero_point); + test.AddInput("bias", {N}, Bias); + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, true, true, true); -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8_bias.onnx"); + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + test.SetOutputRelErr("Y", 2e-2f); + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); } +#endif TEST(MatMulIntegerToFloat, MatMulInteger_With_ZeroPoint) { auto test_case = [&](const std::vector& input_shape, @@ -478,242 +476,5 @@ TEST(MatMulIntegerToFloat, MatMulInteger_With_ZeroPoint) { test_case({15, 14, 13}, {15, 13, 27}, {15, 1, 27}); } -TEST(MatMulIntegerToFloat, CustomMatMul) { - OpTester test("MatMulIntegerToFloat", 1, kMSDomain); - int64_t M = 2; - int64_t N = 2; - int64_t K = 2; - - std::vector AMatrix = {1, 1, - 1, 1}; - std::vector BMatrix = {1, 1, - 1, 1}; - test.AddInput("A", {M,K}, AMatrix); - test.AddInput("B", {N,K}, BMatrix); - - test.AddInput("a_scale", {}, {1.0f}); - test.AddInput("b_scale", {}, {1.0f}); - //test.AddInput("a_zero_point", {}, {113}); - - std::vector expected_vals(M * N); - for (int64_t m = 0; m < M; m++) { - for (int64_t n = 0; n < N; n++) { - float sum = 0.0f; - for (int64_t k = 0; k < K; k++) { - sum += AMatrix[m * K + k] * BMatrix[k * N + n]; - } - expected_vals[m * N + n] = sum; - } - } - - test.AddOutput("Y", {M , N}, expected_vals); - - test.Run(); -} - -TEST(MatMulIntegerToFloat, CustomZPMatMul) { - OpTester test("MatMulIntegerToFloat", 1, kMSDomain); - int64_t M = 2; - int64_t N = 2; - int64_t K = 2; - - std::vector AMatrix = {1, 1, - 1, 1}; - std::vector BMatrix = {1, -1, - 1, 1}; - float AScale = 1.0f; - float BScale = 1.0f; - - uint8_t AZP = 113; - int8_t BZP = -16; - - test.AddInput("A", {M, K}, AMatrix); - test.AddInput("B", {N, K}, BMatrix); - - test.AddInput("a_scale", {}, {AScale}); - test.AddInput("b_scale", {}, {BScale}); - test.AddInput("a_zero_point", {}, {AZP}); - test.AddInput("b_zero_point", {}, {BZP}); - - std::vector expected_vals(M * N); - for (int64_t m = 0; m < M; m++) { - for (int64_t n = 0; n < N; n++) { - float sum = 0.0f; - for (int64_t k = 0; k < K; k++) { - sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP) * BScale); - } - expected_vals[m * N + n] = sum; - } - } - - test.AddOutput("Y", {M, N}, expected_vals); - - test.Run(); -} - -TEST(MatMulIntegerToFloat, CustomScaleMatMul) { - OpTester test("MatMulIntegerToFloat", 1, kMSDomain); - int64_t M = 2; - int64_t N = 2; - int64_t K = 2; - - std::vector AMatrix = {1, 1, - 1, 1}; - std::vector BMatrix = {1, 1, - 1, 1}; - float AScale = 0.910f; - float BScale = 1.10f; - - uint8_t AZP = 1; - uint8_t BZP= 1; - - test.AddInput("A", {M, K}, AMatrix); - test.AddInput("B", {N, K}, BMatrix); - - test.AddInput("a_scale", {}, {AScale}); - test.AddInput("b_scale", {}, {BScale}); - test.AddInput("a_zero_point", {}, {AZP}); - test.AddInput("b_zero_point", {}, {BZP}); - - std::vector expected_vals(M * N); - for (int64_t m = 0; m < M; m++) { - for (int64_t n = 0; n < N; n++) { - float sum = 0.0f; - for (int64_t k = 0; k < K; k++) { - sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP) * BScale); - } - expected_vals[m * N + n] = sum; - } - } - - test.AddOutput("Y", {M, N}, expected_vals); - - test.Run(); -} - -TEST(MatMulIntegerToFloat, CustomMatMul1) { - OpTester test("MatMulIntegerToFloat", 1, kMSDomain); - int64_t M = 2; - int64_t N = 2; - int64_t K = 2; - - std::vector AMatrix = {11, -2, - -1, 3}; - std::vector BMatrix = {-13, -2, - -1, 23}; - float AScale = 0.910f; - float BScale = 1.10f; - - int8_t AZP = 113; - int8_t BZP = 98; - - test.AddInput("A", {M, K}, AMatrix); - test.AddInput("B", {N, K}, BMatrix); - - test.AddInput("a_scale", {}, {AScale}); - test.AddInput("b_scale", {}, {BScale}); - test.AddInput("a_zero_point", {}, {AZP}); - test.AddInput("b_zero_point", {}, {BZP}); - - std::vector expected_vals(M * N); - for (int64_t m = 0; m < M; m++) { - for (int64_t n = 0; n < N; n++) { - float sum = 0.0f; - for (int64_t k = 0; k < K; k++) { - sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP) * BScale); - } - expected_vals[m * N + n] = sum; - } - } - - test.AddOutput("Y", {M, N}, expected_vals); - - test.Run(); -} - -TEST(MatMulIntegerToFloat, CustomMatMul2) { - OpTester test("MatMulIntegerToFloat", 1, kMSDomain); - int64_t M = 2; - int64_t N = 2; - int64_t K = 2; - - std::vector AMatrix = {11, -2, - -1, 3}; - std::vector BMatrix = {-13, -2, - -1, 23}; - float AScale = 0.910f; - std::vector BScale = {1.10f, 1.123f}; - - int8_t AZP = 113; - std::vector BZP = {98, 71}; - - test.AddInput("A", {M, K}, AMatrix); - test.AddInput("B", {K, N}, BMatrix); - - test.AddInput("a_scale", {}, {AScale}); - test.AddInput("b_scale", {N}, BScale); - test.AddInput("a_zero_point", {}, {AZP}); - test.AddInput("b_zero_point", {N}, BZP); - - std::vector expected_vals(M * N); - for (int64_t m = 0; m < M; m++) { - for (int64_t n = 0; n < N; n++) { - float sum = 0.0f; - for (int64_t k = 0; k < K; k++) { - sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP[n]) * BScale[n]); - } - expected_vals[m * N + n] = sum; - } - } - - test.AddOutput("Y", {M, N}, expected_vals); - - test.Run(); -} - -TEST(MatMulIntegerToFloat, CustomBiasMatMul) { - OpTester test("MatMulIntegerToFloat", 1, kMSDomain); - int64_t M = 2; - int64_t N = 2; - int64_t K = 3; - - std::vector AMatrix = {11, -2, 5, - -1, 3, 10}; - std::vector BMatrix = {-13, -2, - 9, 55, - -1, 23}; - float AScale = 0.910f; - std::vector BScale = {1.10f, 1.123f}; - - int8_t AZP = 113; - std::vector BZP = {98, 71}; - - std::vector Bias = {0.10f, 1.123f}; - - test.AddInput("A", {M, K}, AMatrix); - test.AddInput("B", {K, N}, BMatrix); - - test.AddInput("a_scale", {}, {AScale}); - test.AddInput("b_scale", {N}, BScale); - test.AddInput("a_zero_point", {}, {AZP}); - test.AddInput("b_zero_point", {N}, BZP); - test.AddInput("bias", {N}, Bias); - - std::vector expected_vals(M * N); - for (int64_t m = 0; m < M; m++) { - for (int64_t n = 0; n < N; n++) { - float sum = 0.0f; - for (int64_t k = 0; k < K; k++) { - sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP[n]) * BScale[n]); - } - expected_vals[m * N + n] = sum + Bias[n]; - } - } - - test.AddOutput("Y", {M, N}, expected_vals); - - test.Run(); -} - } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/testdata/matmul_integer_to_float.py b/onnxruntime/test/testdata/matmul_integer_to_float.py index ac91877a0ea44..36902598aad14 100644 --- a/onnxruntime/test/testdata/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/matmul_integer_to_float.py @@ -78,35 +78,7 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia if __name__ == "__main__": - GenerateModel("matmul_integer_to_float16_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=True) - GenerateModel("matmul_integer_to_float16_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=True) - GenerateModel( - "matmul_integer_to_float16_int8_bias.onnx", - sign_i=False, - sign_w=True, - output_type_fp16=True, - has_zp=False, - bias=True, - ) - GenerateModel( - "matmul_integer_to_float16_uint8_bias.onnx", - sign_i=False, - sign_w=False, - output_type_fp16=True, - has_zp=False, - bias=True, - ) - - GenerateModel("matmul_integer_to_float16_int8_int8.onnx", sign_i=True, sign_w=True, output_type_fp16=True) - GenerateModel( - "matmul_integer_to_float16_int8_int8_bias.onnx", - sign_i=True, - sign_w=True, - output_type_fp16=True, - has_zp=False, - bias=True, - ) - + #GenerateModel("matmul_integer_to_float16_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=True) GenerateModel("matmul_integer_to_float_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=False) GenerateModel("matmul_integer_to_float_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=False) GenerateModel( diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx deleted file mode 100644 index 22293b0d10756..0000000000000 --- a/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx +++ /dev/null @@ -1,51 +0,0 @@ - :Ì -U -A -B - a_zero_point - b_zero_pointmatmul_output_int32 MatMulInteger" MatMulInteger -. -a_scale -b_scale -multiplier mul_right"Mul -A -matmul_output_int32matmul_output_floatcast"Cast* -to -  -5 -matmul_output_float - -multiplierY -mul_bottom"MulDynamicQuantizeMatMul_fusionZ -A - - -M -KZ -B - - -K -NZ -a_scale - - - -Z -b_scale -  - -CZ - a_zero_point - - -Z - b_zero_point -  -Cb -Y - - - -M -NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx deleted file mode 100644 index b92648e6ac23c..0000000000000 --- a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx +++ /dev/null @@ -1,49 +0,0 @@ - :Ä -9 -A -Bmatmul_output_int32 MatMulInteger" MatMulInteger -. -a_scale -b_scale -multiplier mul_right"Mul -A -matmul_output_int32matmul_output_floatcast"Cast* -to -  -E -matmul_output_float - -multipliermul_bottom_output -mul_bottom"Mul -& -mul_bottom_output -biasYadd"AddDynamicQuantizeMatMul_fusionZ -A - - -M -KZ -B - - -K -NZ -a_scale - - - -Z -b_scale -  - -CZ -bias -  - -Nb -Y - - - -M -NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx deleted file mode 100644 index 3bb5129ba0800..0000000000000 --- a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx +++ /dev/null @@ -1,51 +0,0 @@ - :Ì -U -A -B - a_zero_point - b_zero_pointmatmul_output_int32 MatMulInteger" MatMulInteger -. -a_scale -b_scale -multiplier mul_right"Mul -A -matmul_output_int32matmul_output_floatcast"Cast* -to -  -5 -matmul_output_float - -multiplierY -mul_bottom"MulDynamicQuantizeMatMul_fusionZ -A - - -M -KZ -B - - -K -NZ -a_scale - - - -Z -b_scale -  - -CZ - a_zero_point - - -Z - b_zero_point -  -Cb -Y - - - -M -NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx deleted file mode 100644 index 76bf3f698fcee..0000000000000 --- a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx +++ /dev/null @@ -1,49 +0,0 @@ - :Ä -9 -A -Bmatmul_output_int32 MatMulInteger" MatMulInteger -. -a_scale -b_scale -multiplier mul_right"Mul -A -matmul_output_int32matmul_output_floatcast"Cast* -to -  -E -matmul_output_float - -multipliermul_bottom_output -mul_bottom"Mul -& -mul_bottom_output -biasYadd"AddDynamicQuantizeMatMul_fusionZ -A - - -M -KZ -B - - -K -NZ -a_scale - - - -Z -b_scale -  - -CZ -bias -  - -Nb -Y - - - -M -NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16_int8.onnx b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16_int8.onnx deleted file mode 100644 index 22293b0d10756..0000000000000 --- a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16_int8.onnx +++ /dev/null @@ -1,51 +0,0 @@ - :Ì -U -A -B - a_zero_point - b_zero_pointmatmul_output_int32 MatMulInteger" MatMulInteger -. -a_scale -b_scale -multiplier mul_right"Mul -A -matmul_output_int32matmul_output_floatcast"Cast* -to -  -5 -matmul_output_float - -multiplierY -mul_bottom"MulDynamicQuantizeMatMul_fusionZ -A - - -M -KZ -B - - -K -NZ -a_scale - - - -Z -b_scale -  - -CZ - a_zero_point - - -Z - b_zero_point -  -Cb -Y - - - -M -NB \ No newline at end of file From b9e5f1544f805db21bedcaa4fa7c0ab465a26fc8 Mon Sep 17 00:00:00 2001 From: Anagha Rao Date: Tue, 30 Jan 2024 12:08:38 -0800 Subject: [PATCH 3/7] Lintrunner --- .../matmul_integer_to_float_test.cc | 39 ++++++++----------- .../test/testdata/matmul_integer_to_float.py | 2 +- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc index 49560b8ff268a..b2e17c5333319 100644 --- a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc @@ -24,9 +24,7 @@ namespace onnxruntime { namespace test { template -static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, const int64_t K, const std::vector& A_data, const std::vector& A_scale, const std::vector& A_zero_point, const std::vector& B_data, std::vector& B_scale, std::vector& B_zero_point, const - std::vector& Bias, std::vector& Y_data, bool per_column, bool has_zp, bool has_bias) { - +static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, const int64_t K, const std::vector& A_data, const std::vector& A_scale, const std::vector& A_zero_point, const std::vector& B_data, std::vector& B_scale, std::vector& B_zero_point, const std::vector& Bias, std::vector& Y_data, bool per_column, bool has_zp, bool has_bias) { if (!per_column) { B_zero_point.resize(N, B_zero_point[0]); B_scale.resize(N, B_scale[0]); @@ -77,7 +75,7 @@ void TestMatMulIntegerToFloat(bool is_matrix_b_constant, std::numeric_limits::max()); std::transform(tmp_B_data.begin(), tmp_B_data.end(), std::back_inserter(B_data), [](int32_t v) -> WType { - return static_cast(v); + return static_cast(v); }); std::vector A_scale = random.Uniform(AsSpan({1}), -0.1f, 0.1f); @@ -120,22 +118,21 @@ void TestMatMulIntegerToFloat(bool is_matrix_b_constant, std::vector Y_data(M * N); CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, per_column, has_zp, has_bias); - if ( constexpr(std::is_same_v)) { - test.AddOutput("Y", {M, N}, Y_data); - } else { - test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); - test.SetOutputAbsErr("Y", 0.5f); - } + if (constexpr(std::is_same_v)) { + test.AddOutput("Y", {M, N}, Y_data); + } else { + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + test.SetOutputAbsErr("Y", 0.5f); + } // Only DML EP supports these data type combinations for now if ((constexpr(std::is_same_v)) || (constexpr(std::is_same_v) && - /*(constexpr(std::is_same_v) &&*/ !constexpr(std::is_same_v)) - ) { + !constexpr(std::is_same_v))) { std::vector> execution_providers; execution_providers.push_back(DefaultDmlExecutionProvider()); test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); - } else { + } else { test.Run(); } } @@ -191,7 +188,6 @@ TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8U8) { RunMatMulIntegerToFloatTest(); } - TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8U8) { RunMatMulIntegerToFloatTest(); } @@ -200,7 +196,6 @@ TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8U8) { RunMatMulIntegerToFloatTest(); } - TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8) { RunMatMulIntegerToFloatTest(); } @@ -247,7 +242,7 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8U8) { int64_t K = 2; std::vector A_data = {1, 5, 2, 1, 9, - 1, 1, 3, 7, 2}; + 1, 1, 3, 7, 2}; std::vector B_data = {3, 7, 2, 1, 1, 2, 1, 9, 1, 1}; std::vector A_scale = ToFloat16({3.0f}); @@ -257,7 +252,6 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8U8) { std::vector A_zero_point = {3}; std::vector B_zero_point = {5}; - test.AddInput("a_scale", {1}, A_scale); test.AddInput("b_scale", {1}, B_scale); test.AddInput("a_zero_point", {1}, A_zero_point); @@ -281,7 +275,7 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8S8) { std::vector A_data = {3, 7, 2, 1, 1, 2, 1, 9, 1, 1}; std::vector B_data = {2, -1, -9, 1, 1, - -1, 0, -3, 1, -4}; + -1, 0, -3, 1, -4}; std::vector A_scale = ToFloat16({-4.0f}); std::vector B_scale = ToFloat16({2.0f}); test.AddInput("A", {M, K}, A_data); @@ -312,7 +306,7 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8S8) { int64_t K = 2; std::vector A_data = {3, 7, -2, 1, 1, - 2, -1, -9, 1, 1}; + 2, -1, -9, 1, 1}; std::vector B_data = {2, -1, -9, 1, 1, -1, 0, -3, 1, -4}; std::vector A_scale = ToFloat16({-4.0f}); @@ -329,7 +323,6 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8S8) { test.AddInput("b_zero_point", {1}, B_zero_point); test.AddInput("bias", {N}, Bias); - std::vector Y_data(M * N); CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, false, true, true); @@ -381,10 +374,10 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16) { int64_t K = 3; std::vector A_data = {11, -2, 5, - -1, 3, 10}; + -1, 3, 10}; std::vector B_data = {-13, -2, - 9, 55, - -1, 23}; + 9, 55, + -1, 23}; std::vector A_scale = ToFloat16({0.910f}); std::vector B_scale = ToFloat16({1.10f, 1.123f}); diff --git a/onnxruntime/test/testdata/matmul_integer_to_float.py b/onnxruntime/test/testdata/matmul_integer_to_float.py index 36902598aad14..0c9ee3f3e6492 100644 --- a/onnxruntime/test/testdata/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/matmul_integer_to_float.py @@ -78,7 +78,7 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia if __name__ == "__main__": - #GenerateModel("matmul_integer_to_float16_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=True) + # GenerateModel("matmul_integer_to_float16_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=True) GenerateModel("matmul_integer_to_float_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=False) GenerateModel("matmul_integer_to_float_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=False) GenerateModel( From 453fa9ef3df9e4658a97d4c3882ca391e9633525 Mon Sep 17 00:00:00 2001 From: Anagha Rao Date: Tue, 30 Jan 2024 16:34:24 -0800 Subject: [PATCH 4/7] add matmul_integer_to_float16_int8.onnx for graph transformer test and line characters update --- .../matmul_integer_to_float_test.cc | 37 ++++++++++---- .../test/testdata/matmul_integer_to_float.py | 2 +- .../matmul_integer_to_float16_int8.onnx | 51 +++++++++++++++++++ 3 files changed, 80 insertions(+), 10 deletions(-) create mode 100644 onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc index b2e17c5333319..dc466c933c6d7 100644 --- a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc @@ -24,7 +24,12 @@ namespace onnxruntime { namespace test { template -static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, const int64_t K, const std::vector& A_data, const std::vector& A_scale, const std::vector& A_zero_point, const std::vector& B_data, std::vector& B_scale, std::vector& B_zero_point, const std::vector& Bias, std::vector& Y_data, bool per_column, bool has_zp, bool has_bias) { +static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, const int64_t K, + const std::vector& A_data, const std::vector& A_scale, + const std::vector& A_zero_point, const std::vector& B_data, + std::vector& B_scale, std::vector& B_zero_point, + const std::vector& Bias, std::vector& Y_data, + bool per_column, bool has_zp, bool has_bias) { if (!per_column) { B_zero_point.resize(N, B_zero_point[0]); B_scale.resize(N, B_scale[0]); @@ -34,8 +39,10 @@ static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, cons for (int64_t n = 0; n < N; n++) { float sum = 0.0f; for (int64_t k = 0; k < K; k++) { - float A_dequantized = has_zp ? (A_data[m * K + k] - A_zero_point[0]) * A_scale[0] : A_data[m * K + k] * A_scale[0]; - float B_dequantized = has_zp ? (B_data[k * N + n] - B_zero_point[n]) * B_scale[n] : B_data[k * N + n] * B_scale[n]; + float A_dequantized = has_zp ? + (A_data[m * K + k] - A_zero_point[0]) * A_scale[0] : A_data[m * K + k] * A_scale[0]; + float B_dequantized = has_zp ? + (B_data[k * N + n] - B_zero_point[n]) * B_scale[n] : B_data[k * N + n] * B_scale[n]; sum += A_dequantized * B_dequantized; } @@ -116,7 +123,9 @@ void TestMatMulIntegerToFloat(bool is_matrix_b_constant, } std::vector Y_data(M * N); - CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, per_column, has_zp, has_bias); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, Bias, Y_data, + per_column, has_zp, has_bias); if (constexpr(std::is_same_v)) { test.AddOutput("Y", {M, N}, Y_data); @@ -258,7 +267,9 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8U8) { test.AddInput("b_zero_point", {1}, B_zero_point); std::vector Y_data(M * N); - CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, {}, Y_data, false, true, false); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, {}, Y_data, + false, true, false); test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); std::vector> execution_providers; @@ -290,7 +301,9 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8S8) { test.AddInput("b_zero_point", {1}, B_zero_point); std::vector Y_data(M * N); - CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, {}, Y_data, false, true, false); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, {}, Y_data, + false, true, false); test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); @@ -324,7 +337,9 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8S8) { test.AddInput("bias", {N}, Bias); std::vector Y_data(M * N); - CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, false, true, true); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, Bias, Y_data, + false, true, true); test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); @@ -358,7 +373,9 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8U8) { test.AddInput("bias", {N}, Bias); std::vector Y_data(M * N); - CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, false, true, true); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, Bias, Y_data, + false, true, true); test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); @@ -396,7 +413,9 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16) { test.AddInput("bias", {N}, Bias); std::vector Y_data(M * N); - CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, true, true, true); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, Bias, Y_data, + true, true, true); test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); test.SetOutputRelErr("Y", 2e-2f); diff --git a/onnxruntime/test/testdata/matmul_integer_to_float.py b/onnxruntime/test/testdata/matmul_integer_to_float.py index 0c9ee3f3e6492..e6c51009018f9 100644 --- a/onnxruntime/test/testdata/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/matmul_integer_to_float.py @@ -78,7 +78,7 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia if __name__ == "__main__": - # GenerateModel("matmul_integer_to_float16_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=True) + GenerateModel("matmul_integer_to_float16_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=True) GenerateModel("matmul_integer_to_float_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=False) GenerateModel("matmul_integer_to_float_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=False) GenerateModel( diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx new file mode 100644 index 0000000000000..22293b0d10756 --- /dev/null +++ b/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx @@ -0,0 +1,51 @@ + :Ì +U +A +B + a_zero_point + b_zero_pointmatmul_output_int32 MatMulInteger" MatMulInteger +. +a_scale +b_scale +multiplier mul_right"Mul +A +matmul_output_int32matmul_output_floatcast"Cast* +to +  +5 +matmul_output_float + +multiplierY +mul_bottom"MulDynamicQuantizeMatMul_fusionZ +A + + +M +KZ +B + + +K +NZ +a_scale + + + +Z +b_scale +  + +CZ + a_zero_point + + +Z + b_zero_point +  +Cb +Y + + + +M +NB \ No newline at end of file From af2d24d4f36b70f0f84cfd2aeb449ef83e32a41b Mon Sep 17 00:00:00 2001 From: Anagha Rao Date: Thu, 8 Feb 2024 09:43:21 -0800 Subject: [PATCH 5/7] Avoid Overflow condistions --- .../test/contrib_ops/matmul_integer_to_float_test.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc index dc466c933c6d7..adb24dc75f375 100644 --- a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc @@ -40,9 +40,11 @@ static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, cons float sum = 0.0f; for (int64_t k = 0; k < K; k++) { float A_dequantized = has_zp ? - (A_data[m * K + k] - A_zero_point[0]) * A_scale[0] : A_data[m * K + k] * A_scale[0]; + (static_cast(A_data[m * K + k]) - static_cast(A_zero_point[0])) * A_scale[0] : + A_data[m * K + k] * A_scale[0]; float B_dequantized = has_zp ? - (B_data[k * N + n] - B_zero_point[n]) * B_scale[n] : B_data[k * N + n] * B_scale[n]; + (static_cast(B_data[k * N + n]) - static_cast(B_zero_point[n])) * B_scale[n] : + B_data[k * N + n] * B_scale[n]; sum += A_dequantized * B_dequantized; } @@ -258,8 +260,8 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8U8) { std::vector B_scale = ToFloat16({2.0f}); test.AddInput("A", {M, K}, A_data); test.AddInput("B", {K, N}, B_data); - std::vector A_zero_point = {3}; - std::vector B_zero_point = {5}; + std::vector A_zero_point = {1}; + std::vector B_zero_point = {1}; test.AddInput("a_scale", {1}, A_scale); test.AddInput("b_scale", {1}, B_scale); @@ -363,7 +365,7 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8U8) { test.AddInput("A", {M, K}, A_data); test.AddInput("B", {K, N}, B_data); std::vector A_zero_point = {-1}; - std::vector B_zero_point = {3}; + std::vector B_zero_point = {1}; std::vector Bias = ToFloat16({11.0f, -17.0f, 1.0f, -3.0f, 12.0f}); test.AddInput("a_scale", {1}, A_scale); From d3acbaca7d4673bcdbebc799c4d23d8572313dc1 Mon Sep 17 00:00:00 2001 From: Anagha Rao Date: Thu, 8 Feb 2024 12:17:20 -0800 Subject: [PATCH 6/7] Avoid saturation for U8S8 CPU testcases --- .../matmul_integer_to_float_test.cc | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc index adb24dc75f375..dc92068134c67 100644 --- a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc @@ -79,10 +79,11 @@ void TestMatMulIntegerToFloat(bool is_matrix_b_constant, std::vector B_data; - std::vector tmp_B_data = random.Uniform(B_dims, - std::numeric_limits::lowest(), - std::numeric_limits::max()); - + std::vector tmp_B_data; + tmp_B_data = random.Uniform(B_dims, + (constexpr(std::is_same_v)) ? + std::numeric_limits::lowest()/2 : std::numeric_limits::lowest(), + std::numeric_limits::max() / 2); std::transform(tmp_B_data.begin(), tmp_B_data.end(), std::back_inserter(B_data), [](int32_t v) -> WType { return static_cast(v); }); @@ -139,7 +140,8 @@ void TestMatMulIntegerToFloat(bool is_matrix_b_constant, // Only DML EP supports these data type combinations for now if ((constexpr(std::is_same_v)) || (constexpr(std::is_same_v) && - !constexpr(std::is_same_v))) { + constexpr(std::is_same_v) && + constexpr(std::is_same_v))) { std::vector> execution_providers; execution_providers.push_back(DefaultDmlExecutionProvider()); test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); @@ -211,7 +213,23 @@ TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8) { RunMatMulIntegerToFloatTest(); } -// DML EP supports Float16 output type and A Matrix and B Matric of different data types for Float32 output +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); +} + +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); +} + +// DML EP supports Float16 output type and Signed A Matrix and Unsigned B Matric for Float32 output #if defined(USE_DML) TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8) { @@ -230,22 +248,6 @@ TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8U8) { RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8S8) { - RunMatMulIntegerToFloatTest(); -} - -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8S8) { - RunMatMulIntegerToFloatTest(); -} - -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8S8) { - RunMatMulIntegerToFloatTest(); -} - -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8) { - RunMatMulIntegerToFloatTest(); -} - TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8U8) { OpTester test("MatMulIntegerToFloat", 1, kMSDomain); int64_t M = 5; From 8925c191d90e3e570f5c6d9f6d9cff1a1c9f5b92 Mon Sep 17 00:00:00 2001 From: Anagha Rao Date: Thu, 8 Feb 2024 17:36:43 -0800 Subject: [PATCH 7/7] move matmul_integer_to_float16_int8.onnx --- .../{ => transform/fusion}/matmul_integer_to_float16_int8.onnx | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename onnxruntime/test/testdata/{ => transform/fusion}/matmul_integer_to_float16_int8.onnx (100%) diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16_int8.onnx similarity index 100% rename from onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx rename to onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16_int8.onnx