diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorMatMulIntegerToFloat.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorMatMulIntegerToFloat.cpp index ba0ecb9d7af69..b5a3dd0960b86 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorMatMulIntegerToFloat.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorMatMulIntegerToFloat.cpp @@ -19,7 +19,7 @@ class DmlOperatorMatMulIntegerToFloat : public DmlOperator ortBias, ortInputCount }; - + enum DmlInputIndex : uint32_t { dmlA, @@ -51,7 +51,6 @@ class DmlOperatorMatMulIntegerToFloat : public DmlOperator // Broadcast Bias tensor to the shape of the output tensor. if(kernelInfo.IsInputValid(OrtInputTensors::ortBias)) { - m_inputTensorDescs[DmlInputIndex::dmlBias] = CreateTensorDescFromInput(kernelInfo, OrtInputTensors::ortBias, TensorAxis::DoNotCoerce, TensorAxis::W, TensorAxis::RightAligned, outputShape); } @@ -60,9 +59,9 @@ class DmlOperatorMatMulIntegerToFloat : public DmlOperator // Resize the A Scale to be the same dimension as the input tensor. // The 1D tensor needs to be moved to the H channel. m_inputTensorDescs[DmlInputIndex::dmlAScale] = CreateTensorDescFromInput( - kernelInfo, + kernelInfo, OrtInputTensors::ortAScale, - TensorAxis::DoNotCoerce, + TensorAxis::DoNotCoerce, TensorAxis::H, TensorAxis::LeftAligned, std::nullopt, @@ -73,11 +72,10 @@ class DmlOperatorMatMulIntegerToFloat : public DmlOperator // The 1D tensor needs to be moved to the H channel. if (kernelInfo.IsInputValid(OrtInputTensors::ortAZeroPoint)) { - m_inputTensorDescs[DmlInputIndex::dmlAZeroPoint] = CreateTensorDescFromInput( - kernelInfo, + kernelInfo, OrtInputTensors::ortAZeroPoint, - TensorAxis::DoNotCoerce, + TensorAxis::DoNotCoerce, TensorAxis::H, TensorAxis::LeftAligned, std::nullopt, @@ -110,4 +108,4 @@ class DmlOperatorMatMulIntegerToFloat : public DmlOperator DML_OP_DEFINE_CREATION_FUNCTION(MatMulIntegerToFloat, DmlOperatorMatMulIntegerToFloat); -} // namespace Dml \ No newline at end of file +} // namespace Dml diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc index c7f2ec89fb817..ed1911be4cf77 100644 --- a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc @@ -39,8 +39,12 @@ static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, cons for (int64_t n = 0; n < N; n++) { float sum = 0.0f; for (int64_t k = 0; k < K; k++) { - float A_dequantized = has_zp ? (static_cast(A_data[m * K + k]) - static_cast(A_zero_point[0])) * A_scale[0] : A_data[m * K + k] * A_scale[0]; - float B_dequantized = has_zp ? (static_cast(B_data[k * N + n]) - static_cast(B_zero_point[n])) * B_scale[n] : B_data[k * N + n] * B_scale[n]; + float A_dequantized = has_zp ? + (static_cast(A_data[m * K + k]) - static_cast(A_zero_point[0])) * A_scale[0] : + A_data[m * K + k] * A_scale[0]; + float B_dequantized = has_zp ? + (static_cast(B_data[k * N + n]) - static_cast(B_zero_point[n])) * B_scale[n] : + B_data[k * N + n] * B_scale[n]; sum += A_dequantized * B_dequantized; } @@ -77,7 +81,8 @@ void TestMatMulIntegerToFloat(bool is_matrix_b_constant, std::vector tmp_B_data; tmp_B_data = random.Uniform(B_dims, - (constexpr(std::is_same_v)) ? std::numeric_limits::lowest() / 2 : std::numeric_limits::lowest(), + (constexpr(std::is_same_v)) ? + std::numeric_limits::lowest() / 2 :std::numeric_limits::lowest(), std::numeric_limits::max() / 2); std::transform(tmp_B_data.begin(), tmp_B_data.end(), std::back_inserter(B_data), [](int32_t v) -> WType { return static_cast(v); diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc index 6f2fe1d43216b..83a0cfb97e206 100755 --- a/onnxruntime/test/optimizer/graph_transform_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_test.cc @@ -5680,7 +5680,7 @@ TEST_F(GraphTransformationTests, MatMulIntegerToFloatTest) { } #ifdef USE_DML - TEST_F(GraphTransformationTests, MatMulIntegerToFloat16Test) { +TEST_F(GraphTransformationTests, MatMulIntegerToFloat16Test) { constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/matmul_integer_to_float16_int8.onnx"; std::shared_ptr p_model; ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_)); @@ -5688,7 +5688,7 @@ TEST_F(GraphTransformationTests, MatMulIntegerToFloatTest) { for (auto& node : graph.Nodes()) { node.SetExecutionProviderType(kDmlExecutionProvider); - } + } onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::make_unique(), TransformerLevel::Level2)); ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, *logger_)); diff --git a/onnxruntime/test/testdata/matmul_integer_to_float.py b/onnxruntime/test/testdata/matmul_integer_to_float.py index 37db93a288b08..e6c51009018f9 100644 --- a/onnxruntime/test/testdata/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/matmul_integer_to_float.py @@ -13,7 +13,13 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia "MatMulInteger", ), helper.make_node("Mul", ["a_scale", "b_scale"], ["multiplier"], "mul_right"), - helper.make_node("Cast", ["matmul_output_int32"], ["matmul_output_float"], "cast", to=TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT), + helper.make_node( + "Cast", + ["matmul_output_int32"], + ["matmul_output_float"], + "cast", + to=TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, + ), helper.make_node( "Mul", ["matmul_output_float", "multiplier"], @@ -48,14 +54,22 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia if bias: nodes.extend([helper.make_node("Add", ["mul_bottom_output", "bias"], ["Y"], "add")]) - inputs.extend([helper.make_tensor_value_info("bias", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["N"])]) + inputs.extend( + [ + helper.make_tensor_value_info( + "bias", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["N"] + ) + ] + ) graph = helper.make_graph( nodes, "DynamicQuantizeMatMul_fusion", # name inputs, [ # outputs - helper.make_tensor_value_info("Y", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["M", "N"]), + helper.make_tensor_value_info( + "Y", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["M", "N"] + ), ], ) @@ -67,8 +81,29 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia GenerateModel("matmul_integer_to_float16_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=True) GenerateModel("matmul_integer_to_float_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=False) GenerateModel("matmul_integer_to_float_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=False) - GenerateModel("matmul_integer_to_float_int8_bias.onnx", sign_i=False, sign_w=True, output_type_fp16=False, has_zp=False, bias=True) - GenerateModel("matmul_integer_to_float_uint8_bias.onnx", sign_i=False, sign_w=False, output_type_fp16=False, has_zp=False, bias=True) + GenerateModel( + "matmul_integer_to_float_int8_bias.onnx", + sign_i=False, + sign_w=True, + output_type_fp16=False, + has_zp=False, + bias=True, + ) + GenerateModel( + "matmul_integer_to_float_uint8_bias.onnx", + sign_i=False, + sign_w=False, + output_type_fp16=False, + has_zp=False, + bias=True, + ) GenerateModel("matmul_integer_to_float_int8_int8.onnx", sign_i=True, sign_w=True, output_type_fp16=False) - GenerateModel("matmul_integer_to_float_int8_int8_bias.onnx", sign_i=True, sign_w=True, output_type_fp16=False, has_zp=False, bias=True) + GenerateModel( + "matmul_integer_to_float_int8_int8_bias.onnx", + sign_i=True, + sign_w=True, + output_type_fp16=False, + has_zp=False, + bias=True, + ) diff --git a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py index 60bdd92dc9c93..018e5fb332dd0 100644 --- a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py @@ -104,4 +104,4 @@ def GenerateModel(model_name): # noqa: N802 if __name__ == "__main__": - GenerateModel("matmul_integer_to_float.onnx") \ No newline at end of file + GenerateModel("matmul_integer_to_float.onnx")