Skip to content

Commit

Permalink
Lint runner
Browse files Browse the repository at this point in the history
  • Loading branch information
raoanag committed Feb 27, 2024
1 parent 1c74a29 commit 88f988e
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class DmlOperatorMatMulIntegerToFloat : public DmlOperator
ortBias,
ortInputCount
};

enum DmlInputIndex : uint32_t
{
dmlA,
Expand Down Expand Up @@ -51,7 +51,6 @@ class DmlOperatorMatMulIntegerToFloat : public DmlOperator

// Broadcast Bias tensor to the shape of the output tensor.
if(kernelInfo.IsInputValid(OrtInputTensors::ortBias)) {

m_inputTensorDescs[DmlInputIndex::dmlBias] = CreateTensorDescFromInput(kernelInfo, OrtInputTensors::ortBias, TensorAxis::DoNotCoerce,
TensorAxis::W, TensorAxis::RightAligned, outputShape);
}
Expand All @@ -60,9 +59,9 @@ class DmlOperatorMatMulIntegerToFloat : public DmlOperator
// Resize the A Scale to be the same dimension as the input tensor.
// The 1D tensor needs to be moved to the H channel.
m_inputTensorDescs[DmlInputIndex::dmlAScale] = CreateTensorDescFromInput(
kernelInfo,
kernelInfo,
OrtInputTensors::ortAScale,
TensorAxis::DoNotCoerce,
TensorAxis::DoNotCoerce,
TensorAxis::H,
TensorAxis::LeftAligned,
std::nullopt,
Expand All @@ -73,11 +72,10 @@ class DmlOperatorMatMulIntegerToFloat : public DmlOperator
// The 1D tensor needs to be moved to the H channel.
if (kernelInfo.IsInputValid(OrtInputTensors::ortAZeroPoint))
{

m_inputTensorDescs[DmlInputIndex::dmlAZeroPoint] = CreateTensorDescFromInput(
kernelInfo,
kernelInfo,
OrtInputTensors::ortAZeroPoint,
TensorAxis::DoNotCoerce,
TensorAxis::DoNotCoerce,
TensorAxis::H,
TensorAxis::LeftAligned,
std::nullopt,
Expand Down Expand Up @@ -110,4 +108,4 @@ class DmlOperatorMatMulIntegerToFloat : public DmlOperator

DML_OP_DEFINE_CREATION_FUNCTION(MatMulIntegerToFloat, DmlOperatorMatMulIntegerToFloat);

} // namespace Dml
} // namespace Dml
11 changes: 8 additions & 3 deletions onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,12 @@ static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, cons
for (int64_t n = 0; n < N; n++) {
float sum = 0.0f;
for (int64_t k = 0; k < K; k++) {
float A_dequantized = has_zp ? (static_cast<int>(A_data[m * K + k]) - static_cast<int>(A_zero_point[0])) * A_scale[0] : A_data[m * K + k] * A_scale[0];
float B_dequantized = has_zp ? (static_cast<int>(B_data[k * N + n]) - static_cast<int>(B_zero_point[n])) * B_scale[n] : B_data[k * N + n] * B_scale[n];
float A_dequantized = has_zp ?
(static_cast<int>(A_data[m * K + k]) - static_cast<int>(A_zero_point[0])) * A_scale[0] :
A_data[m * K + k] * A_scale[0];
float B_dequantized = has_zp ?
(static_cast<int>(B_data[k * N + n]) - static_cast<int>(B_zero_point[n])) * B_scale[n] :
B_data[k * N + n] * B_scale[n];

sum += A_dequantized * B_dequantized;
}
Expand Down Expand Up @@ -77,7 +81,8 @@ void TestMatMulIntegerToFloat(bool is_matrix_b_constant,

std::vector<WType> tmp_B_data;
tmp_B_data = random.Uniform<WType>(B_dims,
(constexpr(std::is_same_v<WType, int8_t>)) ? std::numeric_limits<int8_t>::lowest() / 2 : std::numeric_limits<uint8_t>::lowest(),
(constexpr(std::is_same_v<WType, int8_t>)) ?
std::numeric_limits<int8_t>::lowest() / 2 :std::numeric_limits<uint8_t>::lowest(),
std::numeric_limits<WType>::max() / 2);
std::transform(tmp_B_data.begin(), tmp_B_data.end(), std::back_inserter(B_data), [](int32_t v) -> WType {
return static_cast<WType>(v);
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/test/optimizer/graph_transform_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5681,15 +5681,15 @@ TEST_F(GraphTransformationTests, MatMulIntegerToFloatTest) {
}

#ifdef USE_DML
TEST_F(GraphTransformationTests, MatMulIntegerToFloat16Test) {
TEST_F(GraphTransformationTests, MatMulIntegerToFloat16Test) {
constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/matmul_integer_to_float16_int8.onnx";
std::shared_ptr<Model> p_model;
ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
Graph& graph = p_model->MainGraph();

for (auto& node : graph.Nodes()) {
node.SetExecutionProviderType(kDmlExecutionProvider);
}
}
onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::make_unique<MatMulIntegerToFloatFusion>(), TransformerLevel::Level2));
ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, *logger_));
Expand Down
47 changes: 41 additions & 6 deletions onnxruntime/test/testdata/matmul_integer_to_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia
"MatMulInteger",
),
helper.make_node("Mul", ["a_scale", "b_scale"], ["multiplier"], "mul_right"),
helper.make_node("Cast", ["matmul_output_int32"], ["matmul_output_float"], "cast", to=TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT),
helper.make_node(
"Cast",
["matmul_output_int32"],
["matmul_output_float"],
"cast",
to=TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT,
),
helper.make_node(
"Mul",
["matmul_output_float", "multiplier"],
Expand Down Expand Up @@ -48,14 +54,22 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia
if bias:
nodes.extend([helper.make_node("Add", ["mul_bottom_output", "bias"], ["Y"], "add")])

inputs.extend([helper.make_tensor_value_info("bias", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["N"])])
inputs.extend(
[
helper.make_tensor_value_info(
"bias", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["N"]
)
]
)

graph = helper.make_graph(
nodes,
"DynamicQuantizeMatMul_fusion", # name
inputs,
[ # outputs
helper.make_tensor_value_info("Y", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["M", "N"]),
helper.make_tensor_value_info(
"Y", TensorProto.FLOAT16 if output_type_fp16 else TensorProto.FLOAT, ["M", "N"]
),
],
)

Expand All @@ -67,8 +81,29 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia
GenerateModel("matmul_integer_to_float16_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=True)
GenerateModel("matmul_integer_to_float_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=False)
GenerateModel("matmul_integer_to_float_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=False)
GenerateModel("matmul_integer_to_float_int8_bias.onnx", sign_i=False, sign_w=True, output_type_fp16=False, has_zp=False, bias=True)
GenerateModel("matmul_integer_to_float_uint8_bias.onnx", sign_i=False, sign_w=False, output_type_fp16=False, has_zp=False, bias=True)
GenerateModel(
"matmul_integer_to_float_int8_bias.onnx",
sign_i=False,
sign_w=True,
output_type_fp16=False,
has_zp=False,
bias=True,
)
GenerateModel(
"matmul_integer_to_float_uint8_bias.onnx",
sign_i=False,
sign_w=False,
output_type_fp16=False,
has_zp=False,
bias=True,
)

GenerateModel("matmul_integer_to_float_int8_int8.onnx", sign_i=True, sign_w=True, output_type_fp16=False)
GenerateModel("matmul_integer_to_float_int8_int8_bias.onnx", sign_i=True, sign_w=True, output_type_fp16=False, has_zp=False, bias=True)
GenerateModel(
"matmul_integer_to_float_int8_int8_bias.onnx",
sign_i=True,
sign_w=True,
output_type_fp16=False,
has_zp=False,
bias=True,
)
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,4 @@ def GenerateModel(model_name): # noqa: N802


if __name__ == "__main__":
GenerateModel("matmul_integer_to_float.onnx")
GenerateModel("matmul_integer_to_float.onnx")

0 comments on commit 88f988e

Please sign in to comment.