diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h index a7a6e59e400ef..3c0f49f3d2d49 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h @@ -882,7 +882,7 @@ struct OperatorDescTraits template <> struct OperatorDescTraits { - static constexpr DML_OPERATOR_TYPE Type = (DML_OPERATOR_TYPE) DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT; + static constexpr DML_OPERATOR_TYPE Type = DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT; }; template <> diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h index 7146edd861257..d5a66a74237ee 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h @@ -826,14 +826,6 @@ class QLinearMatMulHelper : public MatMulHelperBase QLinearMatMulHelper(const Info_t& info, const Shape_t& shape) : MatMulHelperBase(info, shape, 0, 3) {} }; -class MatMulIntegerToFloatHelper : public MatMulHelperBase -{ -public: - template - MatMulIntegerToFloatHelper(const Info_t& info, const Shape_t& shape) : MatMulHelperBase(info, shape, 0, 1) {} -}; - - class TopKHelper { void Initialize( @@ -1752,7 +1744,7 @@ using ShapeInferenceHelper_Identity16 = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_MatMul = MatMulHelper; using ShapeInferenceHelper_MatMulInteger = MatMulHelper; using ShapeInferenceHelper_DynamicQuantizeMatMul = MatMulHelper; -using ShapeInferenceHelper_MatMulIntegerToFloat = MatMulIntegerToFloatHelper; +using ShapeInferenceHelper_MatMulIntegerToFloat = MatMulHelper; using ShapeInferenceHelper_QLinearMatMul = QLinearMatMulHelper; using ShapeInferenceHelper_QLinearAdd = GetBroadcastedOutputShapeHelper; using ShapeInferenceHelper_DynamicQuantizeLinear = GetOutputShapeAsInputShapeHelper; diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc index d1883815c1a6f..dc92068134c67 100644 --- a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc @@ -24,28 +24,66 @@ namespace onnxruntime { namespace test { template -void TestMatMulIntegerToFloat(const std::vector& A_dims, - std::vector B_dims, - const std::string& reference_model, - bool is_matrix_b_constant, +static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, const int64_t K, + const std::vector& A_data, const std::vector& A_scale, + const std::vector& A_zero_point, const std::vector& B_data, + std::vector& B_scale, std::vector& B_zero_point, + const std::vector& Bias, std::vector& Y_data, + bool per_column, bool has_zp, bool has_bias) { + if (!per_column) { + B_zero_point.resize(N, B_zero_point[0]); + B_scale.resize(N, B_scale[0]); + } + + for (int64_t m = 0; m < M; m++) { + for (int64_t n = 0; n < N; n++) { + float sum = 0.0f; + for (int64_t k = 0; k < K; k++) { + float A_dequantized = has_zp ? + (static_cast(A_data[m * K + k]) - static_cast(A_zero_point[0])) * A_scale[0] : + A_data[m * K + k] * A_scale[0]; + float B_dequantized = has_zp ? + (static_cast(B_data[k * N + n]) - static_cast(B_zero_point[n])) * B_scale[n] : + B_data[k * N + n] * B_scale[n]; + + sum += A_dequantized * B_dequantized; + } + if (has_bias) { + sum += Bias[n]; + } + Y_data[m * N + n] = static_cast(sum); + } + } +} + +template +void TestMatMulIntegerToFloat(bool is_matrix_b_constant, bool per_column = false, bool has_zp = true, bool has_bias = false) { // create rand inputs RandomValueGenerator random{}; - + int64_t M = 4; + int64_t N = 128; + int64_t K = 128; + std::vector A_dims{M, K}; + std::vector B_dims{K, N}; + std::vector Y_dims{M, K}; std::vector A_data; - std::vector tmp_A_data = random.Uniform(A_dims, - std::numeric_limits::lowest(), - std::numeric_limits::max()); - std::transform(tmp_A_data.begin(), tmp_A_data.end(), std::back_inserter(A_data), [](int32_t v) -> WType { + std::vector tmp_A_data = random.Uniform(A_dims, + std::numeric_limits::lowest(), + std::numeric_limits::max()); + std::transform(tmp_A_data.begin(), tmp_A_data.end(), std::back_inserter(A_data), [](int32_t v) -> IType { return static_cast(v); }); std::vector B_data; - std::vector tmp_B_data = random.Uniform(B_dims, - std::numeric_limits::lowest(), - std::numeric_limits::max()); + + std::vector tmp_B_data; + tmp_B_data = random.Uniform(B_dims, + (constexpr(std::is_same_v)) ? + std::numeric_limits::lowest()/2 : std::numeric_limits::lowest(), + std::numeric_limits::max() / 2); std::transform(tmp_B_data.begin(), tmp_B_data.end(), std::back_inserter(B_data), [](int32_t v) -> WType { return static_cast(v); }); @@ -60,9 +98,9 @@ void TestMatMulIntegerToFloat(const std::vector& A_dims, std::for_each(B_zero_point.begin(), B_zero_point.end(), [&random](WType& zp) { - zp = static_cast(random.Uniform(std::array{1}, - std::numeric_limits::lowest(), - std::numeric_limits::max())[0]); + zp = static_cast(random.Uniform(std::array{1}, + std::numeric_limits::lowest(), + std::numeric_limits::max())[0]); }); std::vector Bias = random.Uniform(AsSpan({B_dims.back()}), -0.1f, 0.1f); @@ -77,7 +115,7 @@ void TestMatMulIntegerToFloat(const std::vector& A_dims, test.AddInput("a_zero_point", {1}, A_zero_point); test.AddInput("b_zero_point", {b_scale_zp_size}, B_zero_point); } else { - test.AddOptionalInputEdge(); + test.AddOptionalInputEdge(); test.AddOptionalInputEdge(); } @@ -87,34 +125,34 @@ void TestMatMulIntegerToFloat(const std::vector& A_dims, test.AddOptionalInputEdge(); } - test.AddReferenceOutputs(reference_model); -#if defined(USE_DML) - if constexpr (std::is_same_v) { - test.SetOutputRelErr("Y", 2e-2f); + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, Bias, Y_data, + per_column, has_zp, has_bias); + + if (constexpr(std::is_same_v)) { + test.AddOutput("Y", {M, N}, Y_data); } else { - test.SetOutputRelErr("Y", 2.0f); + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + test.SetOutputAbsErr("Y", 0.5f); } -#else - test.SetOutputRelErr("Y", 1e-4f); -#endif - if constexpr (std::is_same_v) { - test.Run(); + // Only DML EP supports these data type combinations for now + if ((constexpr(std::is_same_v)) || + (constexpr(std::is_same_v) && + constexpr(std::is_same_v) && + constexpr(std::is_same_v))) { + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); } else { - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCpuExecutionProvider}); + test.Run(); } } template -void RunMatMulIntegerToFloatTest(const string& model_path) { - std::vector A_dims{4, 128}; - std::vector B_dims{128, 128}; - std::vector Y_dims{4, 128}; - +void RunMatMulIntegerToFloatTest() { TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, false, /*is_matrix_b_constant*/ false, /*per_column*/ HasZeroPoint, /*has_zp*/ @@ -122,9 +160,6 @@ void RunMatMulIntegerToFloatTest(const string& model_path) { ); TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, true, /*is_matrix_b_constant*/ false, /*per_column*/ HasZeroPoint, /*has_zp*/ @@ -132,9 +167,6 @@ void RunMatMulIntegerToFloatTest(const string& model_path) { ); TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, false, /*is_matrix_b_constant*/ true, /*per_column*/ HasZeroPoint, /*has_zp*/ @@ -142,9 +174,6 @@ void RunMatMulIntegerToFloatTest(const string& model_path) { ); TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, true, /*is_matrix_b_constant*/ true, /*per_column*/ HasZeroPoint, /*has_zp*/ @@ -152,44 +181,254 @@ void RunMatMulIntegerToFloatTest(const string& model_path) { ); } -#if USE_DML -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8X8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8.onnx"); - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8X8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_bias.onnx"); - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8S8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8S8) { + RunMatMulIntegerToFloatTest(); } -#endif // USE_DML -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8X8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8.onnx"); - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8X8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_bias.onnx"); - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8_bias.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8_bias.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8) { + RunMatMulIntegerToFloatTest(); +} + +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); +} + +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); +} + +// DML EP supports Float16 output type and Signed A Matrix and Unsigned B Matric for Float32 output +#if defined(USE_DML) + +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8) { + RunMatMulIntegerToFloatTest(); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8U8) { + RunMatMulIntegerToFloatTest(); +} + +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8U8) { + RunMatMulIntegerToFloatTest(); +} + +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8U8) { + RunMatMulIntegerToFloatTest(); +} + +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8U8) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 5; + int64_t N = 5; + int64_t K = 2; + + std::vector A_data = {1, 5, 2, 1, 9, + 1, 1, 3, 7, 2}; + std::vector B_data = {3, 7, 2, 1, 1, + 2, 1, 9, 1, 1}; + std::vector A_scale = ToFloat16({3.0f}); + std::vector B_scale = ToFloat16({2.0f}); + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); + std::vector A_zero_point = {1}; + std::vector B_zero_point = {1}; + + test.AddInput("a_scale", {1}, A_scale); + test.AddInput("b_scale", {1}, B_scale); + test.AddInput("a_zero_point", {1}, A_zero_point); + test.AddInput("b_zero_point", {1}, B_zero_point); + + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, {}, Y_data, + false, true, false); + + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); +} + +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8S8) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 5; + int64_t N = 5; + int64_t K = 2; + + std::vector A_data = {3, 7, 2, 1, 1, + 2, 1, 9, 1, 1}; + std::vector B_data = {2, -1, -9, 1, 1, + -1, 0, -3, 1, -4}; + std::vector A_scale = ToFloat16({-4.0f}); + std::vector B_scale = ToFloat16({2.0f}); + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); + std::vector A_zero_point = {1}; + std::vector B_zero_point = {3}; + std::vector Bias = ToFloat16({11.0f, -17.0f, 1.0f, -3.0f, 12.0f}); + + test.AddInput("a_scale", {1}, A_scale); + test.AddInput("b_scale", {1}, B_scale); + test.AddInput("a_zero_point", {1}, A_zero_point); + test.AddInput("b_zero_point", {1}, B_zero_point); + + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, {}, Y_data, + false, true, false); + + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); +} + +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8S8) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 5; + int64_t N = 5; + int64_t K = 2; + + std::vector A_data = {3, 7, -2, 1, 1, + 2, -1, -9, 1, 1}; + std::vector B_data = {2, -1, -9, 1, 1, + -1, 0, -3, 1, -4}; + std::vector A_scale = ToFloat16({-4.0f}); + std::vector B_scale = ToFloat16({2.0f}); + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); + std::vector A_zero_point = {-1}; + std::vector B_zero_point = {3}; + std::vector Bias = ToFloat16({11.0f, -17.0f, 1.0f, -3.0f, 12.0f}); + + test.AddInput("a_scale", {1}, A_scale); + test.AddInput("b_scale", {1}, B_scale); + test.AddInput("a_zero_point", {1}, A_zero_point); + test.AddInput("b_zero_point", {1}, B_zero_point); + test.AddInput("bias", {N}, Bias); + + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, Bias, Y_data, + false, true, true); + + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); +} + +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8U8) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 5; + int64_t N = 5; + int64_t K = 2; + + std::vector A_data = {3, 7, -2, 1, 1, + 2, -1, -9, 1, 1}; + std::vector B_data = {3, 7, 2, 1, 1, + 2, 1, 9, 1, 1}; + std::vector A_scale = ToFloat16({-4.0f}); + std::vector B_scale = ToFloat16({2.0f}); + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); + std::vector A_zero_point = {-1}; + std::vector B_zero_point = {1}; + std::vector Bias = ToFloat16({11.0f, -17.0f, 1.0f, -3.0f, 12.0f}); + + test.AddInput("a_scale", {1}, A_scale); + test.AddInput("b_scale", {1}, B_scale); + test.AddInput("a_zero_point", {1}, A_zero_point); + test.AddInput("b_zero_point", {1}, B_zero_point); + test.AddInput("bias", {N}, Bias); + + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, Bias, Y_data, + false, true, true); + + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); } +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 2; + int64_t N = 2; + int64_t K = 3; + + std::vector A_data = {11, -2, 5, + -1, 3, 10}; + std::vector B_data = {-13, -2, + 9, 55, + -1, 23}; + std::vector A_scale = ToFloat16({0.910f}); + std::vector B_scale = ToFloat16({1.10f, 1.123f}); + + std::vector A_zero_point = {113}; + std::vector B_zero_point = {98, 71}; + + std::vector Bias = ToFloat16({0.10f, 1.123f}); + + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); + + test.AddInput("a_scale", {}, {A_scale}); + test.AddInput("b_scale", {N}, B_scale); + test.AddInput("a_zero_point", {}, {A_zero_point}); + test.AddInput("b_zero_point", {N}, B_zero_point); + test.AddInput("bias", {N}, Bias); + + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, Bias, Y_data, + true, true, true); + + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + test.SetOutputRelErr("Y", 2e-2f); + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); +} +#endif + TEST(MatMulIntegerToFloat, MatMulInteger_With_ZeroPoint) { auto test_case = [&](const std::vector& input_shape, const std::vector& weights_shape, diff --git a/onnxruntime/test/testdata/matmul_integer_to_float.py b/onnxruntime/test/testdata/matmul_integer_to_float.py index ac91877a0ea44..e6c51009018f9 100644 --- a/onnxruntime/test/testdata/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/matmul_integer_to_float.py @@ -79,34 +79,6 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia if __name__ == "__main__": GenerateModel("matmul_integer_to_float16_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=True) - GenerateModel("matmul_integer_to_float16_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=True) - GenerateModel( - "matmul_integer_to_float16_int8_bias.onnx", - sign_i=False, - sign_w=True, - output_type_fp16=True, - has_zp=False, - bias=True, - ) - GenerateModel( - "matmul_integer_to_float16_uint8_bias.onnx", - sign_i=False, - sign_w=False, - output_type_fp16=True, - has_zp=False, - bias=True, - ) - - GenerateModel("matmul_integer_to_float16_int8_int8.onnx", sign_i=True, sign_w=True, output_type_fp16=True) - GenerateModel( - "matmul_integer_to_float16_int8_int8_bias.onnx", - sign_i=True, - sign_w=True, - output_type_fp16=True, - has_zp=False, - bias=True, - ) - GenerateModel("matmul_integer_to_float_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=False) GenerateModel("matmul_integer_to_float_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=False) GenerateModel( diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx deleted file mode 100644 index 22293b0d10756..0000000000000 --- a/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx +++ /dev/null @@ -1,51 +0,0 @@ - :Ì -U -A -B - a_zero_point - b_zero_pointmatmul_output_int32 MatMulInteger" MatMulInteger -. -a_scale -b_scale -multiplier mul_right"Mul -A -matmul_output_int32matmul_output_floatcast"Cast* -to -  -5 -matmul_output_float - -multiplierY -mul_bottom"MulDynamicQuantizeMatMul_fusionZ -A - - -M -KZ -B - - -K -NZ -a_scale - - - -Z -b_scale -  - -CZ - a_zero_point - - -Z - b_zero_point -  -Cb -Y - - - -M -NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx deleted file mode 100644 index b92648e6ac23c..0000000000000 --- a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx +++ /dev/null @@ -1,49 +0,0 @@ - :Ä -9 -A -Bmatmul_output_int32 MatMulInteger" MatMulInteger -. -a_scale -b_scale -multiplier mul_right"Mul -A -matmul_output_int32matmul_output_floatcast"Cast* -to -  -E -matmul_output_float - -multipliermul_bottom_output -mul_bottom"Mul -& -mul_bottom_output -biasYadd"AddDynamicQuantizeMatMul_fusionZ -A - - -M -KZ -B - - -K -NZ -a_scale - - - -Z -b_scale -  - -CZ -bias -  - -Nb -Y - - - -M -NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx deleted file mode 100644 index 3bb5129ba0800..0000000000000 --- a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx +++ /dev/null @@ -1,51 +0,0 @@ - :Ì -U -A -B - a_zero_point - b_zero_pointmatmul_output_int32 MatMulInteger" MatMulInteger -. -a_scale -b_scale -multiplier mul_right"Mul -A -matmul_output_int32matmul_output_floatcast"Cast* -to -  -5 -matmul_output_float - -multiplierY -mul_bottom"MulDynamicQuantizeMatMul_fusionZ -A - - -M -KZ -B - - -K -NZ -a_scale - - - -Z -b_scale -  - -CZ - a_zero_point - - -Z - b_zero_point -  -Cb -Y - - - -M -NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx deleted file mode 100644 index 76bf3f698fcee..0000000000000 --- a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx +++ /dev/null @@ -1,49 +0,0 @@ - :Ä -9 -A -Bmatmul_output_int32 MatMulInteger" MatMulInteger -. -a_scale -b_scale -multiplier mul_right"Mul -A -matmul_output_int32matmul_output_floatcast"Cast* -to -  -E -matmul_output_float - -multipliermul_bottom_output -mul_bottom"Mul -& -mul_bottom_output -biasYadd"AddDynamicQuantizeMatMul_fusionZ -A - - -M -KZ -B - - -K -NZ -a_scale - - - -Z -b_scale -  - -CZ -bias -  - -Nb -Y - - - -M -NB \ No newline at end of file