diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc index 51d9a57b5e447..d1883815c1a6f 100644 --- a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc @@ -103,7 +103,6 @@ void TestMatMulIntegerToFloat(const std::vector& A_dims, } else { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCpuExecutionProvider}); } - } template @@ -113,43 +112,43 @@ void RunMatMulIntegerToFloatTest(const string& model_path) { std::vector Y_dims{4, 128}; TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, - false, /*is_matrix_b_constant*/ - false, /*per_column*/ - HasZeroPoint, /*has_zp*/ - HasBias /*has_bias*/ + A_dims, + B_dims, + model_path, + false, /*is_matrix_b_constant*/ + false, /*per_column*/ + HasZeroPoint, /*has_zp*/ + HasBias /*has_bias*/ ); TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, - true, /*is_matrix_b_constant*/ - false, /*per_column*/ - HasZeroPoint, /*has_zp*/ - HasBias /*has_bias*/ + A_dims, + B_dims, + model_path, + true, /*is_matrix_b_constant*/ + false, /*per_column*/ + HasZeroPoint, /*has_zp*/ + HasBias /*has_bias*/ ); TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, - false, /*is_matrix_b_constant*/ - true, /*per_column*/ - HasZeroPoint, /*has_zp*/ - HasBias /*has_bias*/ + A_dims, + B_dims, + model_path, + false, /*is_matrix_b_constant*/ + true, /*per_column*/ + HasZeroPoint, /*has_zp*/ + HasBias /*has_bias*/ ); TestMatMulIntegerToFloat( - A_dims, - B_dims, - model_path, - true, /*is_matrix_b_constant*/ - true, /*per_column*/ - HasZeroPoint, /*has_zp*/ - HasBias /*has_bias*/ + A_dims, + B_dims, + model_path, + true, /*is_matrix_b_constant*/ + true, /*per_column*/ + HasZeroPoint, /*has_zp*/ + HasBias /*has_bias*/ ); } @@ -171,7 +170,7 @@ TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8_FP16) { TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8_FP16) { RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float16_int8_int8_bias.onnx"); } -#endif // USE_DML +#endif // USE_DML TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8X8) { RunMatMulIntegerToFloatTest("testdata/matmul_integer_to_float_int8.onnx"); diff --git a/onnxruntime/test/contrib_ops/quantize_attention_op_test.cc b/onnxruntime/test/contrib_ops/quantize_attention_op_test.cc index b14c16b675cc8..90397be306b23 100644 --- a/onnxruntime/test/contrib_ops/quantize_attention_op_test.cc +++ b/onnxruntime/test/contrib_ops/quantize_attention_op_test.cc @@ -114,7 +114,7 @@ void RunQAttention(const std::vector& input_data, execution_providers.push_back(DefaultCpuExecutionProvider()); } else if constexpr (ep == EP::DML) { execution_providers.push_back(DefaultDmlExecutionProvider()); - } else{ // onednn ep + } else { // onednn ep execution_providers.push_back(DefaultDnnlExecutionProvider()); } @@ -322,8 +322,8 @@ static void RunQAttentionAll( batch_size, sequence_length, hidden_size, number_of_heads, use_special_quantize_parameter, is_unidirectional, input_hidden_size); RunQAttentionDML(input_data, weight_data, bias_data, mask_index_data, output_data, - batch_size, sequence_length, hidden_size, number_of_heads, - use_special_quantize_parameter, is_unidirectional, input_hidden_size); + batch_size, sequence_length, hidden_size, number_of_heads, + use_special_quantize_parameter, is_unidirectional, input_hidden_size); } // ONEDNN EP only supports 2D raw mask diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc index ca8807fb1628a..9418ac18d5974 100755 --- a/onnxruntime/test/optimizer/graph_transform_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_test.cc @@ -5661,7 +5661,7 @@ TEST_F(GraphTransformationTests, MatMulIntegerToFloatTest) { } #ifdef USE_DML - TEST_F(GraphTransformationTests, MatMulIntegerToFloat16Test) { +TEST_F(GraphTransformationTests, MatMulIntegerToFloat16Test) { constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/matmul_integer_to_float16_int8.onnx"; std::shared_ptr p_model; ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_)); @@ -5669,7 +5669,7 @@ TEST_F(GraphTransformationTests, MatMulIntegerToFloatTest) { for (auto& node : graph.Nodes()) { node.SetExecutionProviderType(kDmlExecutionProvider); - } + } onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::make_unique(), TransformerLevel::Level2)); ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, *logger_)); diff --git a/onnxruntime/test/testdata/matmul_integer_to_float.py b/onnxruntime/test/testdata/matmul_integer_to_float.py index 206a8514253c5..ac91877a0ea44 100644 --- a/onnxruntime/test/testdata/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/matmul_integer_to_float.py @@ -13,7 +13,13 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia "MatMulInteger", ), helper.make_node("Mul", ["a_scale", "b_scale"], ["multiplier"], "mul_right"), - helper.make_node("Cast", ["matmul_output_int32"], ["matmul_output_float"], "cast", to=TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT), + helper.make_node( + "Cast", + ["matmul_output_int32"], + ["matmul_output_float"], + "cast", + to=TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, + ), helper.make_node( "Mul", ["matmul_output_float", "multiplier"], @@ -48,14 +54,22 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia if bias: nodes.extend([helper.make_node("Add", ["mul_bottom_output", "bias"], ["Y"], "add")]) - inputs.extend([helper.make_tensor_value_info("bias", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["N"])]) + inputs.extend( + [ + helper.make_tensor_value_info( + "bias", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["N"] + ) + ] + ) graph = helper.make_graph( nodes, "DynamicQuantizeMatMul_fusion", # name inputs, [ # outputs - helper.make_tensor_value_info("Y", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["M", "N"]), + helper.make_tensor_value_info( + "Y", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["M", "N"] + ), ], ) @@ -66,16 +80,58 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia if __name__ == "__main__": GenerateModel("matmul_integer_to_float16_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=True) GenerateModel("matmul_integer_to_float16_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=True) - GenerateModel("matmul_integer_to_float16_int8_bias.onnx", sign_i=False, sign_w=True, output_type_fp16=True, has_zp=False, bias=True) - GenerateModel("matmul_integer_to_float16_uint8_bias.onnx", sign_i=False, sign_w=False, output_type_fp16=True, has_zp=False, bias=True) + GenerateModel( + "matmul_integer_to_float16_int8_bias.onnx", + sign_i=False, + sign_w=True, + output_type_fp16=True, + has_zp=False, + bias=True, + ) + GenerateModel( + "matmul_integer_to_float16_uint8_bias.onnx", + sign_i=False, + sign_w=False, + output_type_fp16=True, + has_zp=False, + bias=True, + ) GenerateModel("matmul_integer_to_float16_int8_int8.onnx", sign_i=True, sign_w=True, output_type_fp16=True) - GenerateModel("matmul_integer_to_float16_int8_int8_bias.onnx", sign_i=True, sign_w=True, output_type_fp16=True, has_zp=False, bias=True) + GenerateModel( + "matmul_integer_to_float16_int8_int8_bias.onnx", + sign_i=True, + sign_w=True, + output_type_fp16=True, + has_zp=False, + bias=True, + ) GenerateModel("matmul_integer_to_float_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=False) GenerateModel("matmul_integer_to_float_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=False) - GenerateModel("matmul_integer_to_float_int8_bias.onnx", sign_i=False, sign_w=True, output_type_fp16=False, has_zp=False, bias=True) - GenerateModel("matmul_integer_to_float_uint8_bias.onnx", sign_i=False, sign_w=False, output_type_fp16=False, has_zp=False, bias=True) + GenerateModel( + "matmul_integer_to_float_int8_bias.onnx", + sign_i=False, + sign_w=True, + output_type_fp16=False, + has_zp=False, + bias=True, + ) + GenerateModel( + "matmul_integer_to_float_uint8_bias.onnx", + sign_i=False, + sign_w=False, + output_type_fp16=False, + has_zp=False, + bias=True, + ) GenerateModel("matmul_integer_to_float_int8_int8.onnx", sign_i=True, sign_w=True, output_type_fp16=False) - GenerateModel("matmul_integer_to_float_int8_int8_bias.onnx", sign_i=True, sign_w=True, output_type_fp16=False, has_zp=False, bias=True) + GenerateModel( + "matmul_integer_to_float_int8_int8_bias.onnx", + sign_i=True, + sign_w=True, + output_type_fp16=False, + has_zp=False, + bias=True, + ) diff --git a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py index 60bdd92dc9c93..018e5fb332dd0 100644 --- a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py @@ -104,4 +104,4 @@ def GenerateModel(model_name): # noqa: N802 if __name__ == "__main__": - GenerateModel("matmul_integer_to_float.onnx") \ No newline at end of file + GenerateModel("matmul_integer_to_float.onnx")