diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h index a7a6e59e400ef..3c0f49f3d2d49 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h @@ -882,7 +882,7 @@ struct OperatorDescTraits template <> struct OperatorDescTraits { - static constexpr DML_OPERATOR_TYPE Type = (DML_OPERATOR_TYPE) DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT; + static constexpr DML_OPERATOR_TYPE Type = DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT; }; template <> diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h index 7146edd861257..d5a66a74237ee 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h @@ -826,14 +826,6 @@ class QLinearMatMulHelper : public MatMulHelperBase QLinearMatMulHelper(const Info_t& info, const Shape_t& shape) : MatMulHelperBase(info, shape, 0, 3) {} }; -class MatMulIntegerToFloatHelper : public MatMulHelperBase -{ -public: - template - MatMulIntegerToFloatHelper(const Info_t& info, const Shape_t& shape) : MatMulHelperBase(info, shape, 0, 1) {} -}; - - class TopKHelper { void Initialize( @@ -1752,7 +1744,7 @@ using ShapeInferenceHelper_Identity16 = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_MatMul = MatMulHelper; using ShapeInferenceHelper_MatMulInteger = MatMulHelper; using ShapeInferenceHelper_DynamicQuantizeMatMul = MatMulHelper; -using ShapeInferenceHelper_MatMulIntegerToFloat = MatMulIntegerToFloatHelper; +using ShapeInferenceHelper_MatMulIntegerToFloat = MatMulHelper; using ShapeInferenceHelper_QLinearMatMul = QLinearMatMulHelper; using ShapeInferenceHelper_QLinearAdd = GetBroadcastedOutputShapeHelper; using ShapeInferenceHelper_DynamicQuantizeLinear = GetOutputShapeAsInputShapeHelper; diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc index 53af9ae43eaef..49560b8ff268a 100644 --- a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc @@ -24,69 +24,78 @@ namespace onnxruntime { namespace test { template -void TestMatMulIntegerToFloat(const std::vector& A_dims, - std::vector B_dims, - const std::string& reference_model, - bool is_matrix_b_constant, +static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, const int64_t K, const std::vector& A_data, const std::vector& A_scale, const std::vector& A_zero_point, const std::vector& B_data, std::vector& B_scale, std::vector& B_zero_point, const + std::vector& Bias, std::vector& Y_data, bool per_column, bool has_zp, bool has_bias) { + + if (!per_column) { + B_zero_point.resize(N, B_zero_point[0]); + B_scale.resize(N, B_scale[0]); + } + + for (int64_t m = 0; m < M; m++) { + for (int64_t n = 0; n < N; n++) { + float sum = 0.0f; + for (int64_t k = 0; k < K; k++) { + float A_dequantized = has_zp ? (A_data[m * K + k] - A_zero_point[0]) * A_scale[0] : A_data[m * K + k] * A_scale[0]; + float B_dequantized = has_zp ? (B_data[k * N + n] - B_zero_point[n]) * B_scale[n] : B_data[k * N + n] * B_scale[n]; + + sum += A_dequantized * B_dequantized; + } + if (has_bias) { + sum += Bias[n]; + } + Y_data[m * N + n] = static_cast(sum); + } + } +} + +template +void TestMatMulIntegerToFloat(bool is_matrix_b_constant, bool per_column = false, bool has_zp = true, bool has_bias = false) { // create rand inputs - RandomValueGenerator random{2502124740}; - per_column = reference_model.length() < 0; - + RandomValueGenerator random{}; + int64_t M = 4; + int64_t N = 128; + int64_t K = 128; + std::vector A_dims{M, K}; + std::vector B_dims{K, N}; + std::vector Y_dims{M, K}; std::vector A_data; std::vector tmp_A_data = random.Uniform(A_dims, std::numeric_limits::lowest(), std::numeric_limits::max()); std::transform(tmp_A_data.begin(), tmp_A_data.end(), std::back_inserter(A_data), [](int32_t v) -> IType { - //v = 1; return static_cast(v); }); std::vector B_data; -//#if defined(USE_DML) -// std::vector tmp_B_data = random.Uniform(B_dims, -// (constexpr(std::is_same_v) ? -2 : 1), -// 5); -//#else std::vector tmp_B_data = random.Uniform(B_dims, std::numeric_limits::lowest(), std::numeric_limits::max()); -//#endif std::transform(tmp_B_data.begin(), tmp_B_data.end(), std::back_inserter(B_data), [](int32_t v) -> WType { - //v = 1; return static_cast(v); }); - //std::vector A_scale = random.Uniform(AsSpan({1}), -0.1f, 0.1f); - std::vector A_scale(1, static_cast(1.0f)); + std::vector A_scale = random.Uniform(AsSpan({1}), -0.1f, 0.1f); std::vector A_zero_point{(std::numeric_limits::lowest() + std::numeric_limits::max() + IType(2)) / 2}; int64_t b_scale_zp_size = per_column ? B_dims.back() : 1; - //int64_t b_scale_zp_size = B_dims.back(); - std::vector B_scale = random.Uniform(AsSpan({b_scale_zp_size}), static_cast(-0.1f), static_cast(0.1f)); - //std::vector B_scale (b_scale_zp_size, static_cast(1.0f)); - - //std::vector B_zero_point(b_scale_zp_size, 1); + std::vector B_scale = random.Uniform(AsSpan({b_scale_zp_size}), -0.1f, 0.1f); std::vector B_zero_point(b_scale_zp_size); - if (has_zp) { - std::for_each(B_zero_point.begin(), - B_zero_point.end(), - [&random](WType& zp) { - zp = static_cast(random.Uniform(std::array{1}, - std::numeric_limits::lowest(), - std::numeric_limits::max() / 2)[0]); - }); - } else { - B_zero_point = {0}; - } + std::for_each(B_zero_point.begin(), + B_zero_point.end(), + [&random](WType& zp) { + zp = static_cast(random.Uniform(std::array{1}, + std::numeric_limits::lowest(), + std::numeric_limits::max())[0]); + }); - //std::vector Bias = random.Uniform(AsSpan({B_dims.back()}), -0.1f, 0.1f); - std::vector Bias(B_dims.back(), static_cast(0.0f)); + std::vector Bias = random.Uniform(AsSpan({B_dims.back()}), -0.1f, 0.1f); OpTester test("MatMulIntegerToFloat", 1, onnxruntime::kMSDomain); test.AddInput("A", A_dims, A_data); @@ -107,84 +116,33 @@ void TestMatMulIntegerToFloat(const std::vector& A_dims, } else { test.AddOptionalInputEdge(); } - int64_t M = 10; - int64_t N = 10; - int64_t K = 10; - std::vector expected_vals(M * N); - - //if (constexpr(std::is_same_v)) - //{ - for (int64_t m = 0; m < M; m++) { - for (int64_t n = 0; n < N; n++) { - float sum = 0.0f; - for (int64_t k = 0; k < K; k++) { - float AIntermediate = has_zp ? (A_data[m * K + k] - A_zero_point[0]) : A_data[m * K + k]; - float BIntermediate = has_zp ? (B_data[k * N + n] - B_zero_point[0]) : B_data[k * N + n]; - sum += (AIntermediate * A_scale[0]) * (BIntermediate * B_scale[0]); - } - if (has_bias) { - // sum += Bias[m * N + n]; - sum += Bias[n]; - } - expected_vals[m * N + n] = static_cast(sum); - } - } - if (constexpr(std::is_same_v)) { - test.AddOutput("Y", {M, N}, expected_vals); + + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, per_column, has_zp, has_bias); + + if ( constexpr(std::is_same_v)) { + test.AddOutput("Y", {M, N}, Y_data); } else { - test.AddOutput("Y", {M, N}, ToFloat16(expected_vals)); + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + test.SetOutputAbsErr("Y", 0.5f); } - //} else { - // MLFloat16 AZP = static_cast(A_zero_point[0]); - // MLFloat16 BZP = static_cast(B_zero_point[0]); - // for (int64_t m = 0; m < M; m++) { - // for (int64_t n = 0; n < N; n++) { - // MLFloat16 sum = static_cast(0.0f); - // for (int64_t k = 0; k < K; k++) { - // MLFloat16 AIntermediate = (has_zp ? (A_data[m * K + k] - AZP) : A_data[m * K + k]); - // MLFloat16 BIntermediate = (has_zp ? (B_data[k * N + n] - BZP) : B_data[k * N + n]); - // sum += (AIntermediate * A_scale[0]) * (BIntermediate * B_scale[0]); - // } - // if (has_bias) { - // // sum += Bias[m * N + n]; - // sum += static_cast(Bias[n]); - // } - // expected_vals[m * N + n] = static_cast(sum); - // } - // } - // test.AddOutput("Y", {M, N}, expected_vals); - //} - - //test.AddReferenceOutputs(reference_model); -//#if defined(USE_DML) -// if constexpr (std::is_same_v) { -// test.SetOutputRelErr("Y", 2e-2f); -// } else { -// //test.SetOutputRelErr("Y", 1.0f); -// test.SetOutputAbsErr("Y", 0.5f); -// //test.SetOutputRelErr("Y", 2e-2f); -// } -//#else -// test.SetOutputRelErr("Y", 1e-4f); -//#endif - - if (constexpr(std::is_same_v) && constexpr(std::is_same_v) && constexpr(std::is_same_v)) { + + // Only DML EP supports these data type combinations for now + if ((constexpr(std::is_same_v)) || + (constexpr(std::is_same_v) && + /*(constexpr(std::is_same_v) &&*/ !constexpr(std::is_same_v)) + ) { + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } else { test.Run(); - } else { - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCpuExecutionProvider}); } } template -void RunMatMulIntegerToFloatTest(const string& model_path) { - std::vector A_dims{10, 10}; - std::vector B_dims{10, 10}; - std::vector Y_dims{10, 10}; - +void RunMatMulIntegerToFloatTest() { TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, false, /*is_matrix_b_constant*/ false, /*per_column*/ HasZeroPoint, /*has_zp*/ @@ -192,9 +150,6 @@ void RunMatMulIntegerToFloatTest(const string& model_path) { ); TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, true, /*is_matrix_b_constant*/ false, /*per_column*/ HasZeroPoint, /*has_zp*/ @@ -202,9 +157,6 @@ void RunMatMulIntegerToFloatTest(const string& model_path) { ); TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, false, /*is_matrix_b_constant*/ true, /*per_column*/ HasZeroPoint, /*has_zp*/ @@ -212,9 +164,6 @@ void RunMatMulIntegerToFloatTest(const string& model_path) { ); TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, true, /*is_matrix_b_constant*/ true, /*per_column*/ HasZeroPoint, /*has_zp*/ @@ -222,198 +171,247 @@ void RunMatMulIntegerToFloatTest(const string& model_path) { ); } -#if USE_DML -//TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8X8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8.onnx"); -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); -//} -// -//TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8X8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); -//} -// -//TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8X8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_bias.onnx"); -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); -//} -// -//TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); -//} -// -//TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); -//} -// -//TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); -//} -// -//TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8S8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); -//} -// -//TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8S8_FP16) { -// RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); -//} -#endif // USE_DML - -#if USE_DML - -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_bias.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8S8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8S8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); -} -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8U8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); -} -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8U8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8S8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); -} +// DML EP supports Float16 output type and A Matrix and B Matric of different data types for Float32 output +#if defined(USE_DML) -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8U8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8) { + RunMatMulIntegerToFloatTest(); } -#endif - -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8U8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8U8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8U8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8U8) { + RunMatMulIntegerToFloatTest(); } -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8_FP16) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_uint8_bias.onnx"); +TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); } +TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); +} +TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); +} +TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8) { + RunMatMulIntegerToFloatTest(); +} +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8U8) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 5; + int64_t N = 5; + int64_t K = 2; + std::vector A_data = {1, 5, 2, 1, 9, + 1, 1, 3, 7, 2}; + std::vector B_data = {3, 7, 2, 1, 1, + 2, 1, 9, 1, 1}; + std::vector A_scale = ToFloat16({3.0f}); + std::vector B_scale = ToFloat16({2.0f}); + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); + std::vector A_zero_point = {3}; + std::vector B_zero_point = {5}; + test.AddInput("a_scale", {1}, A_scale); + test.AddInput("b_scale", {1}, B_scale); + test.AddInput("a_zero_point", {1}, A_zero_point); + test.AddInput("b_zero_point", {1}, B_zero_point); + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, {}, Y_data, false, true, false); + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); +} +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8S8) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 5; + int64_t N = 5; + int64_t K = 2; + std::vector A_data = {3, 7, 2, 1, 1, + 2, 1, 9, 1, 1}; + std::vector B_data = {2, -1, -9, 1, 1, + -1, 0, -3, 1, -4}; + std::vector A_scale = ToFloat16({-4.0f}); + std::vector B_scale = ToFloat16({2.0f}); + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); + std::vector A_zero_point = {1}; + std::vector B_zero_point = {3}; + std::vector Bias = ToFloat16({11.0f, -17.0f, 1.0f, -3.0f, 12.0f}); + test.AddInput("a_scale", {1}, A_scale); + test.AddInput("b_scale", {1}, B_scale); + test.AddInput("a_zero_point", {1}, A_zero_point); + test.AddInput("b_zero_point", {1}, B_zero_point); + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, {}, Y_data, false, true, false); + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); +} +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8S8) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 5; + int64_t N = 5; + int64_t K = 2; + std::vector A_data = {3, 7, -2, 1, 1, + 2, -1, -9, 1, 1}; + std::vector B_data = {2, -1, -9, 1, 1, + -1, 0, -3, 1, -4}; + std::vector A_scale = ToFloat16({-4.0f}); + std::vector B_scale = ToFloat16({2.0f}); + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); + std::vector A_zero_point = {-1}; + std::vector B_zero_point = {3}; + std::vector Bias = ToFloat16({11.0f, -17.0f, 1.0f, -3.0f, 12.0f}); + test.AddInput("a_scale", {1}, A_scale); + test.AddInput("b_scale", {1}, B_scale); + test.AddInput("a_zero_point", {1}, A_zero_point); + test.AddInput("b_zero_point", {1}, B_zero_point); + test.AddInput("bias", {N}, Bias); -#if USE_DML - -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8.onnx"); -} - -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_bias.onnx"); -} - -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8.onnx"); -} + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, false, true, true); -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8_bias.onnx"); -} + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8.onnx"); + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); } -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8.onnx"); -} +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8U8) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 5; + int64_t N = 5; + int64_t K = 2; -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8_bias.onnx"); -} + std::vector A_data = {3, 7, -2, 1, 1, + 2, -1, -9, 1, 1}; + std::vector B_data = {3, 7, 2, 1, 1, + 2, 1, 9, 1, 1}; + std::vector A_scale = ToFloat16({-4.0f}); + std::vector B_scale = ToFloat16({2.0f}); + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); + std::vector A_zero_point = {-1}; + std::vector B_zero_point = {3}; + std::vector Bias = ToFloat16({11.0f, -17.0f, 1.0f, -3.0f, 12.0f}); -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8U8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8_bias.onnx"); -} + test.AddInput("a_scale", {1}, A_scale); + test.AddInput("b_scale", {1}, B_scale); + test.AddInput("a_zero_point", {1}, A_zero_point); + test.AddInput("b_zero_point", {1}, B_zero_point); + test.AddInput("bias", {N}, Bias); -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8.onnx"); -} + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, false, true, true); -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8U8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8.onnx"); -} + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8S8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8_bias.onnx"); + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); } -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8U8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8_int8_bias.onnx"); -} +TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16) { + OpTester test("MatMulIntegerToFloat", 1, kMSDomain); + int64_t M = 2; + int64_t N = 2; + int64_t K = 3; -#endif + std::vector A_data = {11, -2, 5, + -1, 3, 10}; + std::vector B_data = {-13, -2, + 9, 55, + -1, 23}; + std::vector A_scale = ToFloat16({0.910f}); + std::vector B_scale = ToFloat16({1.10f, 1.123f}); -TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8U8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8.onnx"); -} + std::vector A_zero_point = {113}; + std::vector B_zero_point = {98, 71}; + std::vector Bias = ToFloat16({0.10f, 1.123f}); -TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8U8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8_bias.onnx"); -} + test.AddInput("A", {M, K}, A_data); + test.AddInput("B", {K, N}, B_data); -TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8U8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8.onnx"); -} + test.AddInput("a_scale", {}, {A_scale}); + test.AddInput("b_scale", {N}, B_scale); + test.AddInput("a_zero_point", {}, {A_zero_point}); + test.AddInput("b_zero_point", {N}, B_zero_point); + test.AddInput("bias", {N}, Bias); + std::vector Y_data(M * N); + CalculateMatMulIntegerToFloat(M, N, K, A_data, A_scale, A_zero_point, B_data, B_scale, B_zero_point, Bias, Y_data, true, true, true); -TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8) { - RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_uint8_bias.onnx"); + test.AddOutput("Y", {M, N}, ToFloat16(Y_data)); + test.SetOutputRelErr("Y", 2e-2f); + std::vector> execution_providers; + execution_providers.push_back(DefaultDmlExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); } +#endif TEST(MatMulIntegerToFloat, MatMulInteger_With_ZeroPoint) { auto test_case = [&](const std::vector& input_shape, @@ -478,242 +476,5 @@ TEST(MatMulIntegerToFloat, MatMulInteger_With_ZeroPoint) { test_case({15, 14, 13}, {15, 13, 27}, {15, 1, 27}); } -TEST(MatMulIntegerToFloat, CustomMatMul) { - OpTester test("MatMulIntegerToFloat", 1, kMSDomain); - int64_t M = 2; - int64_t N = 2; - int64_t K = 2; - - std::vector AMatrix = {1, 1, - 1, 1}; - std::vector BMatrix = {1, 1, - 1, 1}; - test.AddInput("A", {M,K}, AMatrix); - test.AddInput("B", {N,K}, BMatrix); - - test.AddInput("a_scale", {}, {1.0f}); - test.AddInput("b_scale", {}, {1.0f}); - //test.AddInput("a_zero_point", {}, {113}); - - std::vector expected_vals(M * N); - for (int64_t m = 0; m < M; m++) { - for (int64_t n = 0; n < N; n++) { - float sum = 0.0f; - for (int64_t k = 0; k < K; k++) { - sum += AMatrix[m * K + k] * BMatrix[k * N + n]; - } - expected_vals[m * N + n] = sum; - } - } - - test.AddOutput("Y", {M , N}, expected_vals); - - test.Run(); -} - -TEST(MatMulIntegerToFloat, CustomZPMatMul) { - OpTester test("MatMulIntegerToFloat", 1, kMSDomain); - int64_t M = 2; - int64_t N = 2; - int64_t K = 2; - - std::vector AMatrix = {1, 1, - 1, 1}; - std::vector BMatrix = {1, -1, - 1, 1}; - float AScale = 1.0f; - float BScale = 1.0f; - - uint8_t AZP = 113; - int8_t BZP = -16; - - test.AddInput("A", {M, K}, AMatrix); - test.AddInput("B", {N, K}, BMatrix); - - test.AddInput("a_scale", {}, {AScale}); - test.AddInput("b_scale", {}, {BScale}); - test.AddInput("a_zero_point", {}, {AZP}); - test.AddInput("b_zero_point", {}, {BZP}); - - std::vector expected_vals(M * N); - for (int64_t m = 0; m < M; m++) { - for (int64_t n = 0; n < N; n++) { - float sum = 0.0f; - for (int64_t k = 0; k < K; k++) { - sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP) * BScale); - } - expected_vals[m * N + n] = sum; - } - } - - test.AddOutput("Y", {M, N}, expected_vals); - - test.Run(); -} - -TEST(MatMulIntegerToFloat, CustomScaleMatMul) { - OpTester test("MatMulIntegerToFloat", 1, kMSDomain); - int64_t M = 2; - int64_t N = 2; - int64_t K = 2; - - std::vector AMatrix = {1, 1, - 1, 1}; - std::vector BMatrix = {1, 1, - 1, 1}; - float AScale = 0.910f; - float BScale = 1.10f; - - uint8_t AZP = 1; - uint8_t BZP= 1; - - test.AddInput("A", {M, K}, AMatrix); - test.AddInput("B", {N, K}, BMatrix); - - test.AddInput("a_scale", {}, {AScale}); - test.AddInput("b_scale", {}, {BScale}); - test.AddInput("a_zero_point", {}, {AZP}); - test.AddInput("b_zero_point", {}, {BZP}); - - std::vector expected_vals(M * N); - for (int64_t m = 0; m < M; m++) { - for (int64_t n = 0; n < N; n++) { - float sum = 0.0f; - for (int64_t k = 0; k < K; k++) { - sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP) * BScale); - } - expected_vals[m * N + n] = sum; - } - } - - test.AddOutput("Y", {M, N}, expected_vals); - - test.Run(); -} - -TEST(MatMulIntegerToFloat, CustomMatMul1) { - OpTester test("MatMulIntegerToFloat", 1, kMSDomain); - int64_t M = 2; - int64_t N = 2; - int64_t K = 2; - - std::vector AMatrix = {11, -2, - -1, 3}; - std::vector BMatrix = {-13, -2, - -1, 23}; - float AScale = 0.910f; - float BScale = 1.10f; - - int8_t AZP = 113; - int8_t BZP = 98; - - test.AddInput("A", {M, K}, AMatrix); - test.AddInput("B", {N, K}, BMatrix); - - test.AddInput("a_scale", {}, {AScale}); - test.AddInput("b_scale", {}, {BScale}); - test.AddInput("a_zero_point", {}, {AZP}); - test.AddInput("b_zero_point", {}, {BZP}); - - std::vector expected_vals(M * N); - for (int64_t m = 0; m < M; m++) { - for (int64_t n = 0; n < N; n++) { - float sum = 0.0f; - for (int64_t k = 0; k < K; k++) { - sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP) * BScale); - } - expected_vals[m * N + n] = sum; - } - } - - test.AddOutput("Y", {M, N}, expected_vals); - - test.Run(); -} - -TEST(MatMulIntegerToFloat, CustomMatMul2) { - OpTester test("MatMulIntegerToFloat", 1, kMSDomain); - int64_t M = 2; - int64_t N = 2; - int64_t K = 2; - - std::vector AMatrix = {11, -2, - -1, 3}; - std::vector BMatrix = {-13, -2, - -1, 23}; - float AScale = 0.910f; - std::vector BScale = {1.10f, 1.123f}; - - int8_t AZP = 113; - std::vector BZP = {98, 71}; - - test.AddInput("A", {M, K}, AMatrix); - test.AddInput("B", {K, N}, BMatrix); - - test.AddInput("a_scale", {}, {AScale}); - test.AddInput("b_scale", {N}, BScale); - test.AddInput("a_zero_point", {}, {AZP}); - test.AddInput("b_zero_point", {N}, BZP); - - std::vector expected_vals(M * N); - for (int64_t m = 0; m < M; m++) { - for (int64_t n = 0; n < N; n++) { - float sum = 0.0f; - for (int64_t k = 0; k < K; k++) { - sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP[n]) * BScale[n]); - } - expected_vals[m * N + n] = sum; - } - } - - test.AddOutput("Y", {M, N}, expected_vals); - - test.Run(); -} - -TEST(MatMulIntegerToFloat, CustomBiasMatMul) { - OpTester test("MatMulIntegerToFloat", 1, kMSDomain); - int64_t M = 2; - int64_t N = 2; - int64_t K = 3; - - std::vector AMatrix = {11, -2, 5, - -1, 3, 10}; - std::vector BMatrix = {-13, -2, - 9, 55, - -1, 23}; - float AScale = 0.910f; - std::vector BScale = {1.10f, 1.123f}; - - int8_t AZP = 113; - std::vector BZP = {98, 71}; - - std::vector Bias = {0.10f, 1.123f}; - - test.AddInput("A", {M, K}, AMatrix); - test.AddInput("B", {K, N}, BMatrix); - - test.AddInput("a_scale", {}, {AScale}); - test.AddInput("b_scale", {N}, BScale); - test.AddInput("a_zero_point", {}, {AZP}); - test.AddInput("b_zero_point", {N}, BZP); - test.AddInput("bias", {N}, Bias); - - std::vector expected_vals(M * N); - for (int64_t m = 0; m < M; m++) { - for (int64_t n = 0; n < N; n++) { - float sum = 0.0f; - for (int64_t k = 0; k < K; k++) { - sum += ((AMatrix[m * K + k] - AZP) * AScale) * ((BMatrix[k * N + n] - BZP[n]) * BScale[n]); - } - expected_vals[m * N + n] = sum + Bias[n]; - } - } - - test.AddOutput("Y", {M, N}, expected_vals); - - test.Run(); -} - } // namespace test } // namespace onnxruntime