From c26d5305571d91e25d5ec1fc57197c7ea5028fb0 Mon Sep 17 00:00:00 2001 From: raoanag <127366241+raoanag@users.noreply.github.com> Date: Fri, 3 Nov 2023 10:05:09 -0700 Subject: [PATCH] Disable MatMulIntegerToFloat transformation for FP16 on CPU EP (#18239) MatMulIntegerToFloat is updated to support FP16. The nodes for FP16 Transformation use "Mul" FP16, which is not directly supported by the CPU. For now FP16 transformation is only supported for DML EP. Disabled all FP16 tests on CPU. Tests result without `-use_dml` build flag ``` onnxruntime_test_all.exe --gtest_filter="*MatMulIntegerToFloat*" Note: Google Test filter = *MatMulIntegerToFloat* [==========] Running 8 tests from 4 test suites. [----------] Global test environment set-up. [----------] 1 test from CPU_U8S8_Precision_Tests [ RUN ] CPU_U8S8_Precision_Tests.MatMulIntegerToFloat [ OK ] CPU_U8S8_Precision_Tests.MatMulIntegerToFloat (181 ms) [----------] 1 test from CPU_U8S8_Precision_Tests (181 ms total) [----------] 1 test from GraphTransformationTests [ RUN ] GraphTransformationTests.MatMulIntegerToFloatTest [ OK ] GraphTransformationTests.MatMulIntegerToFloatTest (17 ms) [----------] 1 test from GraphTransformationTests (17 ms total) [----------] 1 test from QDQTransformerTests [ RUN ] QDQTransformerTests.MatMulIntegerToFloat [ OK ] QDQTransformerTests.MatMulIntegerToFloat (656 ms) [----------] 1 test from QDQTransformerTests (656 ms total) [----------] 5 tests from MatMulIntegerToFloat [ RUN ] MatMulIntegerToFloat.HasZeroPoint_NoBias_test_U8X8 [ OK ] MatMulIntegerToFloat.HasZeroPoint_NoBias_test_U8X8 (195 ms) [ RUN ] MatMulIntegerToFloat.NoZeroPoint_HasBias_test_U8X8 [ OK ] MatMulIntegerToFloat.NoZeroPoint_HasBias_test_U8X8 (206 ms) [ RUN ] MatMulIntegerToFloat.HasZeroPoint_NoBias_test_S8S8 [ OK ] MatMulIntegerToFloat.HasZeroPoint_NoBias_test_S8S8 (107 ms) [ RUN ] MatMulIntegerToFloat.NoZeroPoint_HasBias_test_S8S8 [ OK ] MatMulIntegerToFloat.NoZeroPoint_HasBias_test_S8S8 (114 ms) [ RUN ] MatMulIntegerToFloat.MatMulInteger_With_ZeroPoint [ OK ] MatMulIntegerToFloat.MatMulInteger_With_ZeroPoint (227 ms) [----------] 5 tests from MatMulIntegerToFloat (854 ms total) [----------] Global test environment tear-down [==========] 8 tests from 4 test suites ran. (1713 ms total) [ PASSED ] 8 tests. memleakdbg: ----- No memory leaks detected ----- ``` ``` onnxruntime_test_all.exe --gtest_filter="GraphTransformationTests.MatMulIntegerToFloat*" Note: Google Test filter = GraphTransformationTests.MatMulIntegerToFloat* [==========] Running 2 tests from 1 test suite. [----------] Global test environment set-up. [----------] 2 tests from GraphTransformationTests [ RUN ] GraphTransformationTests.MatMulIntegerToFloatTest [ OK ] GraphTransformationTests.MatMulIntegerToFloatTest (13 ms) [ RUN ] GraphTransformationTests.MatMulIntegerToFloat16Test [ OK ] GraphTransformationTests.MatMulIntegerToFloat16Test (4 ms) [----------] 2 tests from GraphTransformationTests (20 ms total) [----------] Global test environment tear-down [==========] 2 tests from 1 test suite ran. (22 ms total) [ PASSED ] 2 tests. memleakdbg: ----- No memory leaks detected ----- ``` --- .../fusion/matmul_integer_to_float16.onnx | 90 ------------------- 1 file changed, 90 deletions(-) delete mode 100644 onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx diff --git a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx deleted file mode 100644 index 67d50eac6f74a..0000000000000 --- a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float16.onnx +++ /dev/null @@ -1,90 +0,0 @@ - : -Q -input a_quantizeda_scalea_zpDynamicQuantizeLinear"DynamicQuantizeLinear -a - a_quantized - b_quantized_1 -a_zp -b_zp_1matmul_output_int32_1MatMulInteger_1" MatMulInteger -4 -a_scale - b_scale_1 multiplier_1 mul_right_1"Mul -G -matmul_output_int32_1matmul_output_float_1cast_1"Cast* -to -F -matmul_output_float_1 - multiplier_1 mul_output_1 mul_bottom_1"Mul -1 - mul_output_1 -bias_1output_1 -bias_add_1"Add -a - a_quantized - b_quantized_2 -a_zp -b_zp_2matmul_output_int32_2MatMulInteger_2" MatMulInteger -4 -a_scale - b_scale_2 multiplier_2 mul_right_2"Mul -G -matmul_output_int32_2matmul_output_float_2cast_2"Cast* -to -F -matmul_output_float_2 - multiplier_2 mul_output_2 mul_bottom_2"Mul -1 - mul_output_2 -bias_2output_2 -bias_add_2"Add -a - a_quantized - b_quantized_3 -a_zp -b_zp_3matmul_output_int32_3MatMulInteger_3" MatMulInteger -4 -a_scale - b_scale_3 multiplier_3 mul_right_3"Mul -G -matmul_output_int32_3matmul_output_float_3cast_3"Cast* -to -B -matmul_output_float_3 - multiplier_3output_3 mul_bottom_3"MulMatMulIntegerToFloat_fusion**B b_quantized_1**Bb_zp_1* -*~B b_scale_1**B b_quantized_3**Bb_zp_3* -*~B b_scale_3* -* Bbias_1** -*xBbias_2Z -input -  - - -Z - b_quantized_2 -  - -Z -b_zp_2 - - -Z - b_scale_2 - - - -b -output_1 -  - - -b -output_2 -  - - -b -output_3 -  - - -B \ No newline at end of file