Skip to content

Commit

Permalink
Apply "lintrunner -a" to WindowsAI (#18982)
Browse files Browse the repository at this point in the history
Apply "lintrunner -a" to WindowsAI
  • Loading branch information
jeffbloo authored Jan 3, 2024
1 parent 6a4e9f4 commit c1116b4
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 45 deletions.
59 changes: 29 additions & 30 deletions onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ void TestMatMulIntegerToFloat(const std::vector<int64_t>& A_dims,
} else {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCpuExecutionProvider});
}

}

template <typename IType, typename WType, typename OType, bool HasZeroPoint, bool HasBias>
Expand All @@ -113,43 +112,43 @@ void RunMatMulIntegerToFloatTest(const string& model_path) {
std::vector<int64_t> Y_dims{4, 128};

TestMatMulIntegerToFloat<IType, WType, OType>(
A_dims,
B_dims,
model_path,
false, /*is_matrix_b_constant*/
false, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
A_dims,
B_dims,
model_path,
false, /*is_matrix_b_constant*/
false, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
);

TestMatMulIntegerToFloat<IType, WType, OType>(
A_dims,
B_dims,
model_path,
true, /*is_matrix_b_constant*/
false, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
A_dims,
B_dims,
model_path,
true, /*is_matrix_b_constant*/
false, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
);

TestMatMulIntegerToFloat<IType, WType, OType>(
A_dims,
B_dims,
model_path,
false, /*is_matrix_b_constant*/
true, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
A_dims,
B_dims,
model_path,
false, /*is_matrix_b_constant*/
true, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
);

TestMatMulIntegerToFloat<IType, WType, OType>(
A_dims,
B_dims,
model_path,
true, /*is_matrix_b_constant*/
true, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
A_dims,
B_dims,
model_path,
true, /*is_matrix_b_constant*/
true, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
);
}

Expand All @@ -171,7 +170,7 @@ TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8_FP16) {
TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8_FP16) {
RunMatMulIntegerToFloatTest<int8_t, int8_t, MLFloat16, false, true>("testdata/matmul_integer_to_float16_int8_int8_bias.onnx");

Check warning on line 171 in onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc#L171

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc:171:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
}
#endif // USE_DML
#endif // USE_DML

TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8X8) {
RunMatMulIntegerToFloatTest<uint8_t, int8_t, float, true, false>("testdata/matmul_integer_to_float_int8.onnx");
Expand Down
6 changes: 3 additions & 3 deletions onnxruntime/test/contrib_ops/quantize_attention_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ void RunQAttention(const std::vector<float>& input_data,
execution_providers.push_back(DefaultCpuExecutionProvider());
} else if constexpr (ep == EP::DML) {
execution_providers.push_back(DefaultDmlExecutionProvider());
} else{ // onednn ep
} else { // onednn ep
execution_providers.push_back(DefaultDnnlExecutionProvider());
}

Expand Down Expand Up @@ -322,8 +322,8 @@ static void RunQAttentionAll(
batch_size, sequence_length, hidden_size, number_of_heads,
use_special_quantize_parameter, is_unidirectional, input_hidden_size);
RunQAttentionDML(input_data, weight_data, bias_data, mask_index_data, output_data,
batch_size, sequence_length, hidden_size, number_of_heads,
use_special_quantize_parameter, is_unidirectional, input_hidden_size);
batch_size, sequence_length, hidden_size, number_of_heads,
use_special_quantize_parameter, is_unidirectional, input_hidden_size);
}

// ONEDNN EP only supports 2D raw mask
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/test/optimizer/graph_transform_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5661,15 +5661,15 @@ TEST_F(GraphTransformationTests, MatMulIntegerToFloatTest) {
}

#ifdef USE_DML
TEST_F(GraphTransformationTests, MatMulIntegerToFloat16Test) {
TEST_F(GraphTransformationTests, MatMulIntegerToFloat16Test) {
constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/matmul_integer_to_float16_int8.onnx";
std::shared_ptr<Model> p_model;
ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
Graph& graph = p_model->MainGraph();

for (auto& node : graph.Nodes()) {
node.SetExecutionProviderType(kDmlExecutionProvider);
}
}
onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::make_unique<MatMulIntegerToFloatFusion>(), TransformerLevel::Level2));

Check warning on line 5674 in onnxruntime/test/optimizer/graph_transform_test.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/test/optimizer/graph_transform_test.cc#L5674

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/test/optimizer/graph_transform_test.cc:5674:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, *logger_));
Expand Down
74 changes: 65 additions & 9 deletions onnxruntime/test/testdata/matmul_integer_to_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia
"MatMulInteger",
),
helper.make_node("Mul", ["a_scale", "b_scale"], ["multiplier"], "mul_right"),
helper.make_node("Cast", ["matmul_output_int32"], ["matmul_output_float"], "cast", to=TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT),
helper.make_node(
"Cast",
["matmul_output_int32"],
["matmul_output_float"],
"cast",
to=TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT,
),
helper.make_node(
"Mul",
["matmul_output_float", "multiplier"],
Expand Down Expand Up @@ -48,14 +54,22 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia
if bias:
nodes.extend([helper.make_node("Add", ["mul_bottom_output", "bias"], ["Y"], "add")])

inputs.extend([helper.make_tensor_value_info("bias", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["N"])])
inputs.extend(
[
helper.make_tensor_value_info(
"bias", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["N"]
)
]
)

graph = helper.make_graph(
nodes,
"DynamicQuantizeMatMul_fusion", # name
inputs,
[ # outputs
helper.make_tensor_value_info("Y", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["M", "N"]),
helper.make_tensor_value_info(
"Y", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["M", "N"]
),
],
)

Expand All @@ -66,16 +80,58 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia
if __name__ == "__main__":
GenerateModel("matmul_integer_to_float16_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=True)
GenerateModel("matmul_integer_to_float16_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=True)
GenerateModel("matmul_integer_to_float16_int8_bias.onnx", sign_i=False, sign_w=True, output_type_fp16=True, has_zp=False, bias=True)
GenerateModel("matmul_integer_to_float16_uint8_bias.onnx", sign_i=False, sign_w=False, output_type_fp16=True, has_zp=False, bias=True)
GenerateModel(
"matmul_integer_to_float16_int8_bias.onnx",
sign_i=False,
sign_w=True,
output_type_fp16=True,
has_zp=False,
bias=True,
)
GenerateModel(
"matmul_integer_to_float16_uint8_bias.onnx",
sign_i=False,
sign_w=False,
output_type_fp16=True,
has_zp=False,
bias=True,
)

GenerateModel("matmul_integer_to_float16_int8_int8.onnx", sign_i=True, sign_w=True, output_type_fp16=True)
GenerateModel("matmul_integer_to_float16_int8_int8_bias.onnx", sign_i=True, sign_w=True, output_type_fp16=True, has_zp=False, bias=True)
GenerateModel(
"matmul_integer_to_float16_int8_int8_bias.onnx",
sign_i=True,
sign_w=True,
output_type_fp16=True,
has_zp=False,
bias=True,
)

GenerateModel("matmul_integer_to_float_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=False)
GenerateModel("matmul_integer_to_float_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=False)
GenerateModel("matmul_integer_to_float_int8_bias.onnx", sign_i=False, sign_w=True, output_type_fp16=False, has_zp=False, bias=True)
GenerateModel("matmul_integer_to_float_uint8_bias.onnx", sign_i=False, sign_w=False, output_type_fp16=False, has_zp=False, bias=True)
GenerateModel(
"matmul_integer_to_float_int8_bias.onnx",
sign_i=False,
sign_w=True,
output_type_fp16=False,
has_zp=False,
bias=True,
)
GenerateModel(
"matmul_integer_to_float_uint8_bias.onnx",
sign_i=False,
sign_w=False,
output_type_fp16=False,
has_zp=False,
bias=True,
)

GenerateModel("matmul_integer_to_float_int8_int8.onnx", sign_i=True, sign_w=True, output_type_fp16=False)
GenerateModel("matmul_integer_to_float_int8_int8_bias.onnx", sign_i=True, sign_w=True, output_type_fp16=False, has_zp=False, bias=True)
GenerateModel(
"matmul_integer_to_float_int8_int8_bias.onnx",
sign_i=True,
sign_w=True,
output_type_fp16=False,
has_zp=False,
bias=True,
)
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,4 @@ def GenerateModel(model_name): # noqa: N802


if __name__ == "__main__":
GenerateModel("matmul_integer_to_float.onnx")
GenerateModel("matmul_integer_to_float.onnx")

0 comments on commit c1116b4

Please sign in to comment.