From e0cbbbfd2bd0d2a2c2195633ddd5a963b77c94ef Mon Sep 17 00:00:00 2001
From: raoanag <127366241+raoanag@users.noreply.github.com>
Date: Thu, 3 Aug 2023 10:16:22 -0700
Subject: [PATCH] Adding matmul_integer_to_float16 onnx models (#16978)

### Description
Missed adding float16 onnx models generated using
`matmul_integer_to_float.py`


### Motivation and Context
---
 .../matmul_integer_to_float16_int8.onnx       | 51 +++++++++++
 .../matmul_integer_to_float16_int8_bias.onnx  | 49 ++++++++++
 .../matmul_integer_to_float16_int8_int8.onnx  | 51 +++++++++++
 ...mul_integer_to_float16_int8_int8_bias.onnx | 49 ++++++++++
 .../fusion/matmul_integer_to_float16.onnx     | 90 +++++++++++++++++++
 5 files changed, 290 insertions(+)
 create mode 100644 onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx
 create mode 100644 onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx
 create mode 100644 onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx
 create mode 100644 onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx
 create mode 100644 onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx

diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx
new file mode 100644
index 0000000000000..22293b0d10756
--- /dev/null
+++ b/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx
@@ -0,0 +1,51 @@
+	:╠
+U
+A
+B
+a_zero_point
+b_zero_pointmatmul_output_int32MatMulInteger"MatMulInteger
+.
+a_scale
+b_scale
+multiplier	mul_right"Mul
+A
+matmul_output_int32matmul_output_floatcast"Cast*	
+to
+а
+5
+matmul_output_float
+
+multiplierY
+mul_bottom"MulDynamicQuantizeMatMul_fusionZ
+A
+
+
+M
+KZ
+B
+
+
+K
+NZ
+a_scale
+
+
+
+Z
+b_scale
+	
+
+CZ
+a_zero_point
+
+
+Z
+b_zero_point
+	
+Cb
+Y
+
+
+
+M
+NB
\ No newline at end of file
diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx
new file mode 100644
index 0000000000000..b92648e6ac23c
--- /dev/null
+++ b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx
@@ -0,0 +1,49 @@
+	:─
+9
+A
+Bmatmul_output_int32MatMulInteger"MatMulInteger
+.
+a_scale
+b_scale
+multiplier	mul_right"Mul
+A
+matmul_output_int32matmul_output_floatcast"Cast*	
+to
+а
+E
+matmul_output_float
+
+multipliermul_bottom_output
+mul_bottom"Mul
+&
+mul_bottom_output
+biasYadd"AddDynamicQuantizeMatMul_fusionZ
+A
+
+
+M
+KZ
+B
+
+
+K
+NZ
+a_scale
+
+
+
+Z
+b_scale
+	
+
+CZ
+bias
+	
+
+Nb
+Y
+
+
+
+M
+NB
\ No newline at end of file
diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx
new file mode 100644
index 0000000000000..3bb5129ba0800
--- /dev/null
+++ b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx
@@ -0,0 +1,51 @@
+	:╠
+U
+A
+B
+a_zero_point
+b_zero_pointmatmul_output_int32MatMulInteger"MatMulInteger
+.
+a_scale
+b_scale
+multiplier	mul_right"Mul
+A
+matmul_output_int32matmul_output_floatcast"Cast*	
+to
+а
+5
+matmul_output_float
+
+multiplierY
+mul_bottom"MulDynamicQuantizeMatMul_fusionZ
+A
+
+
+M
+KZ
+B
+
+
+K
+NZ
+a_scale
+
+
+
+Z
+b_scale
+	
+
+CZ
+a_zero_point
+
+
+Z
+b_zero_point
+	
+Cb
+Y
+
+
+
+M
+NB
\ No newline at end of file
diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx
new file mode 100644
index 0000000000000..76bf3f698fcee
--- /dev/null
+++ b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx
@@ -0,0 +1,49 @@
+	:─
+9
+A
+Bmatmul_output_int32MatMulInteger"MatMulInteger
+.
+a_scale
+b_scale
+multiplier	mul_right"Mul
+A
+matmul_output_int32matmul_output_floatcast"Cast*	
+to
+а
+E
+matmul_output_float
+
+multipliermul_bottom_output
+mul_bottom"Mul
+&
+mul_bottom_output
+biasYadd"AddDynamicQuantizeMatMul_fusionZ
+A
+
+
+M
+KZ
+B
+
+
+K
+NZ
+a_scale
+
+
+
+Z
+b_scale
+	
+
+CZ
+bias
+	
+
+Nb
+Y
+
+
+
+M
+NB
\ No newline at end of file
diff --git a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx
new file mode 100644
index 0000000000000..67d50eac6f74a
--- /dev/null
+++ b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx
@@ -0,0 +1,90 @@
+	:╓
+Q
+inputa_quantizeda_scalea_zpDynamicQuantizeLinear"DynamicQuantizeLinear
+a
+a_quantized
+b_quantized_1
+a_zp
+b_zp_1matmul_output_int32_1MatMulInteger_1"MatMulInteger
+4
+a_scale
+	b_scale_1multiplier_1mul_right_1"Mul
+G
+matmul_output_int32_1matmul_output_float_1cast_1"Cast*	
+toа
+F
+matmul_output_float_1
+multiplier_1mul_output_1mul_bottom_1"Mul
+1
+mul_output_1
+bias_1output_1
+bias_add_1"Add
+a
+a_quantized
+b_quantized_2
+a_zp
+b_zp_2matmul_output_int32_2MatMulInteger_2"MatMulInteger
+4
+a_scale
+	b_scale_2multiplier_2mul_right_2"Mul
+G
+matmul_output_int32_2matmul_output_float_2cast_2"Cast*	
+toа
+F
+matmul_output_float_2
+multiplier_2mul_output_2mul_bottom_2"Mul
+1
+mul_output_2
+bias_2output_2
+bias_add_2"Add
+a
+a_quantized
+b_quantized_3
+a_zp
+b_zp_3matmul_output_int32_3MatMulInteger_3"MatMulInteger
+4
+a_scale
+	b_scale_3multiplier_3mul_right_3"Mul
+G
+matmul_output_int32_3matmul_output_float_3cast_3"Cast*	
+toа
+B
+matmul_output_float_3
+multiplier_3output_3mul_bottom_3"MulMatMulIntegerToFloat_fusion**Bb_quantized_1**АBb_zp_1*
+*│~B	b_scale_1**Bb_quantized_3**АBb_zp_3*
+*│~B	b_scale_3*
+*	АААИАКBbias_1**
+*АxАААДАИАКАМАОАРАСBbias_2Z
+input
+
+
+
+Z
+b_quantized_2
+
+
+Z
+b_zp_2
+
+
+Z
+	b_scale_2
+
+
+
+b
+output_1
+
+
+
+b
+output_2
+
+
+
+b
+output_3
+
+
+
+B
\ No newline at end of file