From c26d5305571d91e25d5ec1fc57197c7ea5028fb0 Mon Sep 17 00:00:00 2001
From: raoanag <127366241+raoanag@users.noreply.github.com>
Date: Fri, 3 Nov 2023 10:05:09 -0700
Subject: [PATCH] Disable MatMulIntegerToFloat transformation for FP16 on CPU
 EP (#18239)

MatMulIntegerToFloat is updated to support FP16. The nodes for FP16
Transformation use "Mul" FP16, which is not directly supported by the
CPU.

For now FP16 transformation is only supported for DML EP. Disabled all
FP16 tests on CPU.

Tests result without `-use_dml` build flag
```
onnxruntime_test_all.exe --gtest_filter="*MatMulIntegerToFloat*"
Note: Google Test filter = *MatMulIntegerToFloat*
[==========] Running 8 tests from 4 test suites.
[----------] Global test environment set-up.
[----------] 1 test from CPU_U8S8_Precision_Tests
[ RUN      ] CPU_U8S8_Precision_Tests.MatMulIntegerToFloat
[       OK ] CPU_U8S8_Precision_Tests.MatMulIntegerToFloat (181 ms)
[----------] 1 test from CPU_U8S8_Precision_Tests (181 ms total)

[----------] 1 test from GraphTransformationTests
[ RUN      ] GraphTransformationTests.MatMulIntegerToFloatTest
[       OK ] GraphTransformationTests.MatMulIntegerToFloatTest (17 ms)
[----------] 1 test from GraphTransformationTests (17 ms total)

[----------] 1 test from QDQTransformerTests
[ RUN      ] QDQTransformerTests.MatMulIntegerToFloat
[       OK ] QDQTransformerTests.MatMulIntegerToFloat (656 ms)
[----------] 1 test from QDQTransformerTests (656 ms total)

[----------] 5 tests from MatMulIntegerToFloat
[ RUN      ] MatMulIntegerToFloat.HasZeroPoint_NoBias_test_U8X8
[       OK ] MatMulIntegerToFloat.HasZeroPoint_NoBias_test_U8X8 (195 ms)
[ RUN      ] MatMulIntegerToFloat.NoZeroPoint_HasBias_test_U8X8
[       OK ] MatMulIntegerToFloat.NoZeroPoint_HasBias_test_U8X8 (206 ms)
[ RUN      ] MatMulIntegerToFloat.HasZeroPoint_NoBias_test_S8S8
[       OK ] MatMulIntegerToFloat.HasZeroPoint_NoBias_test_S8S8 (107 ms)
[ RUN      ] MatMulIntegerToFloat.NoZeroPoint_HasBias_test_S8S8
[       OK ] MatMulIntegerToFloat.NoZeroPoint_HasBias_test_S8S8 (114 ms)
[ RUN      ] MatMulIntegerToFloat.MatMulInteger_With_ZeroPoint
[       OK ] MatMulIntegerToFloat.MatMulInteger_With_ZeroPoint (227 ms)
[----------] 5 tests from MatMulIntegerToFloat (854 ms total)

[----------] Global test environment tear-down
[==========] 8 tests from 4 test suites ran. (1713 ms total)
[  PASSED  ] 8 tests.
memleakdbg:
----- No memory leaks detected -----
```

```
onnxruntime_test_all.exe --gtest_filter="GraphTransformationTests.MatMulIntegerToFloat*"
Note: Google Test filter = GraphTransformationTests.MatMulIntegerToFloat*
[==========] Running 2 tests from 1 test suite.
[----------] Global test environment set-up.
[----------] 2 tests from GraphTransformationTests
[ RUN      ] GraphTransformationTests.MatMulIntegerToFloatTest
[       OK ] GraphTransformationTests.MatMulIntegerToFloatTest (13 ms)
[ RUN      ] GraphTransformationTests.MatMulIntegerToFloat16Test
[       OK ] GraphTransformationTests.MatMulIntegerToFloat16Test (4 ms)
[----------] 2 tests from GraphTransformationTests (20 ms total)

[----------] Global test environment tear-down
[==========] 2 tests from 1 test suite ran. (22 ms total)
[  PASSED  ] 2 tests.
memleakdbg:
----- No memory leaks detected -----
```
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
---
 .../fusion/matmul_integer_to_float16.onnx     | 90 -------------------
 1 file changed, 90 deletions(-)
 delete mode 100644 onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx

diff --git a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx
deleted file mode 100644
index 67d50eac6f74a..0000000000000
--- a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx
+++ /dev/null
@@ -1,90 +0,0 @@
-	:╓
-Q
-inputa_quantizeda_scalea_zpDynamicQuantizeLinear"DynamicQuantizeLinear
-a
-a_quantized
-b_quantized_1
-a_zp
-b_zp_1matmul_output_int32_1MatMulInteger_1"MatMulInteger
-4
-a_scale
-	b_scale_1multiplier_1mul_right_1"Mul
-G
-matmul_output_int32_1matmul_output_float_1cast_1"Cast*	
-toа
-F
-matmul_output_float_1
-multiplier_1mul_output_1mul_bottom_1"Mul
-1
-mul_output_1
-bias_1output_1
-bias_add_1"Add
-a
-a_quantized
-b_quantized_2
-a_zp
-b_zp_2matmul_output_int32_2MatMulInteger_2"MatMulInteger
-4
-a_scale
-	b_scale_2multiplier_2mul_right_2"Mul
-G
-matmul_output_int32_2matmul_output_float_2cast_2"Cast*	
-toа
-F
-matmul_output_float_2
-multiplier_2mul_output_2mul_bottom_2"Mul
-1
-mul_output_2
-bias_2output_2
-bias_add_2"Add
-a
-a_quantized
-b_quantized_3
-a_zp
-b_zp_3matmul_output_int32_3MatMulInteger_3"MatMulInteger
-4
-a_scale
-	b_scale_3multiplier_3mul_right_3"Mul
-G
-matmul_output_int32_3matmul_output_float_3cast_3"Cast*	
-toа
-B
-matmul_output_float_3
-multiplier_3output_3mul_bottom_3"MulMatMulIntegerToFloat_fusion**Bb_quantized_1**АBb_zp_1*
-*│~B	b_scale_1**Bb_quantized_3**АBb_zp_3*
-*│~B	b_scale_3*
-*	АААИАКBbias_1**
-*АxАААДАИАКАМАОАРАСBbias_2Z
-input
-
-
-
-Z
-b_quantized_2
-
-
-Z
-b_zp_2
-
-
-Z
-	b_scale_2
-
-
-
-b
-output_1
-
-
-
-b
-output_2
-
-
-
-b
-output_3
-
-
-
-B
\ No newline at end of file