diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc index b2e17c5333319..dc466c933c6d7 100644 --- a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc @@ -24,7 +24,12 @@ namespace onnxruntime { namespace test { template -static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, const int64_t K, const std::vector& A_data, const std::vector& A_scale, const std::vector& A_zero_point, const std::vector& B_data, std::vector& B_scale, std::vector& B_zero_point, const std::vector& Bias, std::vector& Y_data, bool per_column, bool has_zp, bool has_bias) { +static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, const int64_t K, + const std::vector& A_data, const std::vector& A_scale, + const std::vector& A_zero_point, const std::vector& B_data, + std::vector& B_scale, std::vector& B_zero_point, + const std::vector& Bias, std::vector& Y_data, + bool per_column, bool has_zp, bool has_bias) { if (!per_column) { B_zero_point.resize(N, B_zero_point[0]); B_scale.resize(N, B_scale[0]); @@ -34,8 +39,10 @@ static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, cons for (int64_t n = 0; n < N; n++) { float sum = 0.0f; for (int64_t k = 0; k < K; k++) { - float A_dequantized = has_zp ? (A_data[m * K + k] - A_zero_point[0]) * A_scale[0] : A_data[m * K + k] * A_scale[0]; - float B_dequantized = has_zp ? (B_data[k * N + n] - B_zero_point[n]) * B_scale[n] : B_data[k * N + n] * B_scale[n]; + float A_dequantized = has_zp ? + (A_data[m * K + k] - A_zero_point[0]) * A_scale[0] : A_data[m * K + k] * A_scale[0]; + float B_dequantized = has_zp ? + (B_data[k * N + n] - B_zero_point[n]) * B_scale[n] : B_data[k * N + n] * B_scale[n]; sum += A_dequantized * B_dequantized; } @@ -116,7 +123,9 @@ void TestMatMulIntegerToFloat(bool is_matrix_b_constant, } std::vector Y_data(M * N); - CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, per_column, has_zp, has_bias); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, Bias, Y_data, + per_column, has_zp, has_bias); if (constexpr(std::is_same_v)) { test.AddOutput("Y", {M, N}, Y_data); @@ -258,7 +267,9 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8U8) { test.AddInput("b_zero_point", {1}, B_zero_point); std::vector Y_data(M * N); - CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, {}, Y_data, false, true, false); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, {}, Y_data, + false, true, false); test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); std::vector> execution_providers; @@ -290,7 +301,9 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8S8) { test.AddInput("b_zero_point", {1}, B_zero_point); std::vector Y_data(M * N); - CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, {}, Y_data, false, true, false); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, {}, Y_data, + false, true, false); test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); @@ -324,7 +337,9 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8S8) { test.AddInput("bias", {N}, Bias); std::vector Y_data(M * N); - CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, false, true, true); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, Bias, Y_data, + false, true, true); test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); @@ -358,7 +373,9 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8U8) { test.AddInput("bias", {N}, Bias); std::vector Y_data(M * N); - CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, false, true, true); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, Bias, Y_data, + false, true, true); test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); @@ -396,7 +413,9 @@ TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16) { test.AddInput("bias", {N}, Bias); std::vector Y_data(M * N); - CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, true, true, true); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, + B_data, B_scale, B_zero_point, Bias, Y_data, + true, true, true); test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); test.SetOutputRelErr("Y", 2e-2f); diff --git a/onnxruntime/test/testdata/matmul_integer_to_float.py b/onnxruntime/test/testdata/matmul_integer_to_float.py index 0c9ee3f3e6492..e6c51009018f9 100644 --- a/onnxruntime/test/testdata/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/matmul_integer_to_float.py @@ -78,7 +78,7 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia if __name__ == "__main__": - # GenerateModel("matmul_integer_to_float16_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=True) + GenerateModel("matmul_integer_to_float16_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=True) GenerateModel("matmul_integer_to_float_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=False) GenerateModel("matmul_integer_to_float_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=False) GenerateModel( diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx new file mode 100644 index 0000000000000..22293b0d10756 --- /dev/null +++ b/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx @@ -0,0 +1,51 @@ + :Ì +U +A +B + a_zero_point + b_zero_pointmatmul_output_int32 MatMulInteger" MatMulInteger +. +a_scale +b_scale +multiplier mul_right"Mul +A +matmul_output_int32matmul_output_floatcast"Cast* +to +  +5 +matmul_output_float + +multiplierY +mul_bottom"MulDynamicQuantizeMatMul_fusionZ +A + + +M +KZ +B + + +K +NZ +a_scale + + + +Z +b_scale +  + +CZ + a_zero_point + + +Z + b_zero_point +  +Cb +Y + + + +M +NB \ No newline at end of file