diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx new file mode 100644 index 0000000000000..22293b0d10756 --- /dev/null +++ b/onnxruntime/test/testdata/matmul_integer_to_float16_int8.onnx @@ -0,0 +1,51 @@ + : +U +A +B + a_zero_point + b_zero_pointmatmul_output_int32 MatMulInteger" MatMulInteger +. +a_scale +b_scale +multiplier mul_right"Mul +A +matmul_output_int32matmul_output_floatcast"Cast* +to + +5 +matmul_output_float + +multiplierY +mul_bottom"MulDynamicQuantizeMatMul_fusionZ +A + + +M +KZ +B + + +K +NZ +a_scale + + + +Z +b_scale +  + +CZ + a_zero_point + + +Z + b_zero_point +  +Cb +Y + + + +M +NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx new file mode 100644 index 0000000000000..b92648e6ac23c --- /dev/null +++ b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_bias.onnx @@ -0,0 +1,49 @@ + : +9 +A +Bmatmul_output_int32 MatMulInteger" MatMulInteger +. +a_scale +b_scale +multiplier mul_right"Mul +A +matmul_output_int32matmul_output_floatcast"Cast* +to + +E +matmul_output_float + +multipliermul_bottom_output +mul_bottom"Mul +& +mul_bottom_output +biasYadd"AddDynamicQuantizeMatMul_fusionZ +A + + +M +KZ +B + + +K +NZ +a_scale + + + +Z +b_scale +  + +CZ +bias +  + +Nb +Y + + + +M +NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx new file mode 100644 index 0000000000000..3bb5129ba0800 --- /dev/null +++ b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8.onnx @@ -0,0 +1,51 @@ + : +U +A +B + a_zero_point + b_zero_pointmatmul_output_int32 MatMulInteger" MatMulInteger +. +a_scale +b_scale +multiplier mul_right"Mul +A +matmul_output_int32matmul_output_floatcast"Cast* +to + +5 +matmul_output_float + +multiplierY +mul_bottom"MulDynamicQuantizeMatMul_fusionZ +A + + +M +KZ +B + + +K +NZ +a_scale + + + +Z +b_scale +  + +CZ + a_zero_point + + +Z + b_zero_point +  +Cb +Y + + + +M +NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx new file mode 100644 index 0000000000000..76bf3f698fcee --- /dev/null +++ b/onnxruntime/test/testdata/matmul_integer_to_float16_int8_int8_bias.onnx @@ -0,0 +1,49 @@ + : +9 +A +Bmatmul_output_int32 MatMulInteger" MatMulInteger +. +a_scale +b_scale +multiplier mul_right"Mul +A +matmul_output_int32matmul_output_floatcast"Cast* +to + +E +matmul_output_float + +multipliermul_bottom_output +mul_bottom"Mul +& +mul_bottom_output +biasYadd"AddDynamicQuantizeMatMul_fusionZ +A + + +M +KZ +B + + +K +NZ +a_scale + + + +Z +b_scale +  + +CZ +bias +  + +Nb +Y + + + +M +NB \ No newline at end of file diff --git a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx new file mode 100644 index 0000000000000..67d50eac6f74a --- /dev/null +++ b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx @@ -0,0 +1,90 @@ + : +Q +input a_quantizeda_scalea_zpDynamicQuantizeLinear"DynamicQuantizeLinear +a + a_quantized + b_quantized_1 +a_zp +b_zp_1matmul_output_int32_1MatMulInteger_1" MatMulInteger +4 +a_scale + b_scale_1 multiplier_1 mul_right_1"Mul +G +matmul_output_int32_1matmul_output_float_1cast_1"Cast* +to +F +matmul_output_float_1 + multiplier_1 mul_output_1 mul_bottom_1"Mul +1 + mul_output_1 +bias_1output_1 +bias_add_1"Add +a + a_quantized + b_quantized_2 +a_zp +b_zp_2matmul_output_int32_2MatMulInteger_2" MatMulInteger +4 +a_scale + b_scale_2 multiplier_2 mul_right_2"Mul +G +matmul_output_int32_2matmul_output_float_2cast_2"Cast* +to +F +matmul_output_float_2 + multiplier_2 mul_output_2 mul_bottom_2"Mul +1 + mul_output_2 +bias_2output_2 +bias_add_2"Add +a + a_quantized + b_quantized_3 +a_zp +b_zp_3matmul_output_int32_3MatMulInteger_3" MatMulInteger +4 +a_scale + b_scale_3 multiplier_3 mul_right_3"Mul +G +matmul_output_int32_3matmul_output_float_3cast_3"Cast* +to +B +matmul_output_float_3 + multiplier_3output_3 mul_bottom_3"MulMatMulIntegerToFloat_fusion**B b_quantized_1**Bb_zp_1* +*~B b_scale_1**B b_quantized_3**Bb_zp_3* +*~B b_scale_3* +* Bbias_1** +*xBbias_2Z +input +  + + +Z + b_quantized_2 +  + +Z +b_zp_2 + + +Z + b_scale_2 + + + +b +output_1 +  + + +b +output_2 +  + + +b +output_3 +  + + +B \ No newline at end of file