From b9c63251156a6d24577d7614e2a8a7001092c923 Mon Sep 17 00:00:00 2001 From: raoanag <127366241+raoanag@users.noreply.github.com> Date: Wed, 22 Nov 2023 10:46:07 -0800 Subject: [PATCH] Adding matmul_integer_to_float16 onnx models (#16978) (#18552) [Cherry Pick Reviewed] ### Description Missed adding float16 onnx models generated using `matmul_integer_to_float.py` ### Motivation and Context ### Description ### Motivation and Context --- .../matmul_integer_to_float16_int8.onnx | 51 +++++++++++ .../matmul_integer_to_float16_int8_bias.onnx | 49 ++++++++++ .../matmul_integer_to_float16_int8_int8.onnx | 51 +++++++++++ ...mul_integer_to_float16_int8_int8_bias.onnx | 49 ++++++++++ .../fusion/matmul_integer_to_float16.onnx | 90 +++++++++++++++++++ 5 files changed, 290 insertions(+) create mode 100644 onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx create mode 100644 onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx create mode 100644 onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx create mode 100644 onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx new file mode 100644 index 0000000000000..22293b0d10756 --- /dev/null +++ b/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx @@ -0,0 +1,51 @@ + : +U +A +B + a_zero_point + b_zero_pointmatmul_output_int32 MatMulInteger" MatMulInteger +. +a_scale +b_scale +multiplier mul_right"Mul +A +matmul_output_int32matmul_output_floatcast"Cast* +to + +5 +matmul_output_float + +multiplierY +mul_bottom"MulDynamicQuantizeMatMul_fusionZ +A + + +M +KZ +B + + +K +NZ +a_scale + + + +Z +b_scale +  + +CZ + a_zero_point + + +Z + b_zero_point +  +Cb +Y + + + +M +NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx new file mode 100644 index 0000000000000..b92648e6ac23c --- /dev/null +++ b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx @@ -0,0 +1,49 @@ + : +9 +A +Bmatmul_output_int32 MatMulInteger" MatMulInteger +. +a_scale +b_scale +multiplier mul_right"Mul +A +matmul_output_int32matmul_output_floatcast"Cast* +to + +E +matmul_output_float + +multipliermul_bottom_output +mul_bottom"Mul +& +mul_bottom_output +biasYadd"AddDynamicQuantizeMatMul_fusionZ +A + + +M +KZ +B + + +K +NZ +a_scale + + + +Z +b_scale +  + +CZ +bias +  + +Nb +Y + + + +M +NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx new file mode 100644 index 0000000000000..3bb5129ba0800 --- /dev/null +++ b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx @@ -0,0 +1,51 @@ + : +U +A +B + a_zero_point + b_zero_pointmatmul_output_int32 MatMulInteger" MatMulInteger +. +a_scale +b_scale +multiplier mul_right"Mul +A +matmul_output_int32matmul_output_floatcast"Cast* +to + +5 +matmul_output_float + +multiplierY +mul_bottom"MulDynamicQuantizeMatMul_fusionZ +A + + +M +KZ +B + + +K +NZ +a_scale + + + +Z +b_scale +  + +CZ + a_zero_point + + +Z + b_zero_point +  +Cb +Y + + + +M +NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx new file mode 100644 index 0000000000000..76bf3f698fcee --- /dev/null +++ b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx @@ -0,0 +1,49 @@ + : +9 +A +Bmatmul_output_int32 MatMulInteger" MatMulInteger +. +a_scale +b_scale +multiplier mul_right"Mul +A +matmul_output_int32matmul_output_floatcast"Cast* +to + +E +matmul_output_float + +multipliermul_bottom_output +mul_bottom"Mul +& +mul_bottom_output +biasYadd"AddDynamicQuantizeMatMul_fusionZ +A + + +M +KZ +B + + +K +NZ +a_scale + + + +Z +b_scale +  + +CZ +bias +  + +Nb +Y + + + +M +NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx new file mode 100644 index 0000000000000..67d50eac6f74a --- /dev/null +++ b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx @@ -0,0 +1,90 @@ + : +Q +input a_quantizeda_scalea_zpDynamicQuantizeLinear"DynamicQuantizeLinear +a + a_quantized + b_quantized_1 +a_zp +b_zp_1matmul_output_int32_1MatMulInteger_1" MatMulInteger +4 +a_scale + b_scale_1 multiplier_1 mul_right_1"Mul +G +matmul_output_int32_1matmul_output_float_1cast_1"Cast* +to +F +matmul_output_float_1 + multiplier_1 mul_output_1 mul_bottom_1"Mul +1 + mul_output_1 +bias_1output_1 +bias_add_1"Add +a + a_quantized + b_quantized_2 +a_zp +b_zp_2matmul_output_int32_2MatMulInteger_2" MatMulInteger +4 +a_scale + b_scale_2 multiplier_2 mul_right_2"Mul +G +matmul_output_int32_2matmul_output_float_2cast_2"Cast* +to +F +matmul_output_float_2 + multiplier_2 mul_output_2 mul_bottom_2"Mul +1 + mul_output_2 +bias_2output_2 +bias_add_2"Add +a + a_quantized + b_quantized_3 +a_zp +b_zp_3matmul_output_int32_3MatMulInteger_3" MatMulInteger +4 +a_scale + b_scale_3 multiplier_3 mul_right_3"Mul +G +matmul_output_int32_3matmul_output_float_3cast_3"Cast* +to +B +matmul_output_float_3 + multiplier_3output_3 mul_bottom_3"MulMatMulIntegerToFloat_fusion**B b_quantized_1**Bb_zp_1* +*~B b_scale_1**B b_quantized_3**Bb_zp_3* +*~B b_scale_3* +* Bbias_1** +*xBbias_2Z +input +  + + +Z + b_quantized_2 +  + +Z +b_zp_2 + + +Z + b_scale_2 + + + +b +output_1 +  + + +b +output_2 +  + + +b +output_3 +  + + +B \ No newline at end of file