Add support for 16bit QDQ Clip. Add more 16-bit QDQ tests.

microsoft · Sep 20, 2023 · 7e8f7ce · 7e8f7ce
1 parent 1b66cfa
commit 7e8f7ce
Show file tree

Hide file tree

Showing 8 changed files with 375 additions and 172 deletions.
diff --git a/onnxruntime/core/optimizer/qdq_transformer/clip_quantizelinear.cc b/onnxruntime/core/optimizer/qdq_transformer/clip_quantizelinear.cc
@@ -1,8 +1,11 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include "core/optimizer/initializer.h"
 #include "core/optimizer/qdq_transformer/clip_quantizelinear.h"
+
+#include <limits>
+
+#include "core/optimizer/initializer.h"
 #include "core/optimizer/qdq_transformer/qdq_util.h"
 #include "core/optimizer/utils.h"
 #include "core/graph/graph_utils.h"
@@ -50,14 +53,26 @@ static bool GetQConstantLowerUpper(const Graph& graph, const Node& node, float&
   switch (zp_initializer.data_type()) {
     case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
       const int8_t zero_point = zp_initializer.data<int8_t>()[0];
-      lower = scale * (-128 - zero_point);
-      upper = scale * (127 - zero_point);
+      lower = scale * (std::numeric_limits<int8_t>::lowest() - zero_point);
+      upper = scale * (std::numeric_limits<int8_t>::max() - zero_point);
       break;
     }
     case ONNX_NAMESPACE::TensorProto_DataType_UINT8: {
       const uint8_t zero_point = zp_initializer.data<uint8_t>()[0];
-      lower = scale * (0 - zero_point);
-      upper = scale * (255 - zero_point);
+      lower = scale * (std::numeric_limits<uint8_t>::lowest() - zero_point);
+      upper = scale * (std::numeric_limits<uint8_t>::max() - zero_point);
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_INT16: {
+      const int16_t zero_point = zp_initializer.data<int16_t>()[0];
+      lower = scale * (std::numeric_limits<int16_t>::lowest() - zero_point);
+      upper = scale * (std::numeric_limits<int16_t>::max() - zero_point);
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_UINT16: {
+      const uint16_t zero_point = zp_initializer.data<uint16_t>()[0];
+      lower = scale * (std::numeric_limits<uint16_t>::lowest() - zero_point);
+      upper = scale * (std::numeric_limits<uint16_t>::max() - zero_point);
       break;
     }
     default:

diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc
@@ -2501,28 +2501,34 @@ TEST(QDQTransformerTests, Clip) {
   for (auto opset : opsets) {
     test_case(.0235294122248888f, static_cast<int8_t>(-128), 0, opset);        // [0, 6]
     test_case(.0235294122248888f, static_cast<int8_t>(-128), 0, opset, true);  // [0, 6] contrib qdq
-    test_case(.02f, static_cast<int8_t>(-128), 0, opset);                      // [0, 5.1]
-    test_case(.02f, static_cast<int8_t>(-128), 0, opset, true);                // [0, 5.1] contrib qdq
-    test_case(.03f, static_cast<int8_t>(-128), 1, opset);                      // [0, 7.65]
-    test_case(.03f, static_cast<int8_t>(-128), 1, opset, true);                // [0, 7.65] contrib qdq
-    test_case(.02f, static_cast<int8_t>(127), 1, opset);                       // [-5.1 , 0]
-    test_case(.02f, static_cast<int8_t>(127), 1, opset, true);                 // [-5.1 , 0] contrib qdq
-    test_case(.02f, static_cast<int8_t>(0), 1, opset);                         // [-2.56, 2.54]
-    test_case(.02f, static_cast<int8_t>(0), 1, opset, true);                   // [-2.56, 2.54] contrib qdq
-    test_case(.04f, static_cast<int8_t>(-97), 1, opset);                       // [-1.24, 8.96]
-    test_case(.04f, static_cast<int8_t>(-97), 1, opset, true);                 // [-1.24, 8.96] contrib qdq
-    test_case(.02352941176f, static_cast<uint8_t>(0), 0, opset);               // [0, 6]
-    test_case(.02352941176f, static_cast<uint8_t>(0), 0, opset, true);         // [0, 6] contrib qdq
-    test_case(.02f, static_cast<uint8_t>(0), 0, opset);                        // [0, 5.1]
-    test_case(.02f, static_cast<uint8_t>(0), 0, opset, true);                  // [0, 5.1] contrib qdq
-    test_case(.03f, static_cast<uint8_t>(0), 1, opset);                        // [0, 7.65]
-    test_case(.03f, static_cast<uint8_t>(0), 1, opset, true);                  // [0, 7.65] contrib qdq
-    test_case(.02f, static_cast<uint8_t>(255), 1, opset);                      // [-5.1, 0]
-    test_case(.02f, static_cast<uint8_t>(255), 1, opset, true);                // [-5.1, 0] contrib qdq
-    test_case(.02f, static_cast<uint8_t>(128), 1, opset);                      // [-2.56, 2.54]
-    test_case(.02f, static_cast<uint8_t>(128), 1, opset, true);                // [-2.56, 2.54] contrib qdq
-    test_case(.04f, static_cast<uint8_t>(31), 1, opset);                       // [-1.24, 8.96]
-    test_case(.04f, static_cast<uint8_t>(31), 1, opset, true);                 // [-1.24, 8.96] contrib qdq
+    test_case(9.15541313801785e-5f, static_cast<int16_t>(std::numeric_limits<int16_t>::min()), 0,
+              opset, true);  // [0, 6] contrib 16-bit qdq
+    test_case(0.0009f, static_cast<int16_t>(std::numeric_limits<int16_t>::min()), 1,
+              opset, true);                                             // [0, 58.98] contrib 16-bit qdq
+    test_case(.02f, static_cast<int8_t>(-128), 0, opset);               // [0, 5.1]
+    test_case(.02f, static_cast<int8_t>(-128), 0, opset, true);         // [0, 5.1] contrib qdq
+    test_case(.03f, static_cast<int8_t>(-128), 1, opset);               // [0, 7.65]
+    test_case(.03f, static_cast<int8_t>(-128), 1, opset, true);         // [0, 7.65] contrib qdq
+    test_case(.02f, static_cast<int8_t>(127), 1, opset);                // [-5.1 , 0]
+    test_case(.02f, static_cast<int8_t>(127), 1, opset, true);          // [-5.1 , 0] contrib qdq
+    test_case(.02f, static_cast<int8_t>(0), 1, opset);                  // [-2.56, 2.54]
+    test_case(.02f, static_cast<int8_t>(0), 1, opset, true);            // [-2.56, 2.54] contrib qdq
+    test_case(.04f, static_cast<int8_t>(-97), 1, opset);                // [-1.24, 8.96]
+    test_case(.04f, static_cast<int8_t>(-97), 1, opset, true);          // [-1.24, 8.96] contrib qdq
+    test_case(.02352941176f, static_cast<uint8_t>(0), 0, opset);        // [0, 6]
+    test_case(.02352941176f, static_cast<uint8_t>(0), 0, opset, true);  // [0, 6] contrib qdq
+    test_case(9.15541313801785e-5f, static_cast<uint16_t>(0), 0, opset, true);  // [0, 6] contrib 16-bit qdq
+    test_case(0.0009f, static_cast<uint16_t>(0), 1, opset, true);  // [0, 58.98] contrib 16-bit qdq
+    test_case(.02f, static_cast<uint8_t>(0), 0, opset);                 // [0, 5.1]
+    test_case(.02f, static_cast<uint8_t>(0), 0, opset, true);           // [0, 5.1] contrib qdq
+    test_case(.03f, static_cast<uint8_t>(0), 1, opset);                 // [0, 7.65]
+    test_case(.03f, static_cast<uint8_t>(0), 1, opset, true);           // [0, 7.65] contrib qdq
+    test_case(.02f, static_cast<uint8_t>(255), 1, opset);               // [-5.1, 0]
+    test_case(.02f, static_cast<uint8_t>(255), 1, opset, true);         // [-5.1, 0] contrib qdq
+    test_case(.02f, static_cast<uint8_t>(128), 1, opset);               // [-2.56, 2.54]
+    test_case(.02f, static_cast<uint8_t>(128), 1, opset, true);         // [-2.56, 2.54] contrib qdq
+    test_case(.04f, static_cast<uint8_t>(31), 1, opset);                // [-1.24, 8.96]
+    test_case(.04f, static_cast<uint8_t>(31), 1, opset, true);          // [-1.24, 8.96] contrib qdq
   }
 
   // opset_version = 10

diff --git a/onnxruntime/test/providers/qnn/clip_op_test.cc b/onnxruntime/test/providers/qnn/clip_op_test.cc
@@ -78,7 +78,8 @@ template <typename QType>
 static void RunQDQClipTestOnHTP(const TestInputDef<float>& input_def,
                                 const std::vector<TestInputDef<float>>& min_max_defs,
                                 ExpectedEPNodeAssignment expected_ep_assignment,
-                                int opset = 13) {
+                                int opset = 13,
+                                bool use_contrib_qdq = false) {
   ProviderOptions provider_options;
 
 #if defined(_WIN32)
@@ -88,7 +89,8 @@ static void RunQDQClipTestOnHTP(const TestInputDef<float>& input_def,
 #endif
 
   auto f32_model_builder = BuildOpTestCase<float, float>("Clip", {input_def}, {min_max_defs}, {});
-  auto qdq_model_builder = BuildQDQOpTestCase<QType, float>("Clip", {input_def}, {min_max_defs}, {});
+  auto qdq_model_builder = BuildQDQOpTestCase<QType, float>("Clip", {input_def}, {min_max_defs}, {},
+                                                            kOnnxDomain, use_contrib_qdq);
 
   TestQDQModelAccuracy(f32_model_builder,
                        qdq_model_builder,
@@ -97,7 +99,7 @@ static void RunQDQClipTestOnHTP(const TestInputDef<float>& input_def,
                        expected_ep_assignment);
 }
 
-// Test QDQ Clip with default min/max.
+// Test 8-bit QDQ Clip with default min/max.
 // NOTE: The Clip operator is *optimized* away during L1 optimizations, so QNN EP does not get a graph with a Clip op.
 // Instead, QNN EP will get a graph with a Q -> DQ.
 // - Original sequence: Q1 -> DQ1 -> Clip -> Q2 -> DQ2
@@ -109,14 +111,38 @@ TEST_F(QnnHTPBackendTests, Clip_U8_DefaultMinMax_Rank4) {
                                ExpectedEPNodeAssignment::All);
 }
 
-// Test QDQ Clip with non-default min and max inputs. QNN EP will get a graph with a Clip operator.
+// Test 16-bit QDQ Clip with default min/max.
+// NOTE: The Clip operator is *optimized* away during L1 optimizations, so QNN EP does not get a graph with a Clip op.
+// Instead, QNN EP will get a graph with a Q -> DQ.
+// - Original sequence: Q1 -> DQ1 -> Clip -> Q2 -> DQ2
+// - ClipQuantFusion: Fuses Clip -> QuantizeLinear resulting in Q1 -> DQ1 -> Q2' -> DQ2
+// - DoubleQDQPairsRemover: Simplifies remaining Q1 -> DQ1 -> Q2' -> DQ2 sequence to Q1 -> DQ2.
+TEST_F(QnnHTPBackendTests, Clip_U16_DefaultMinMax_Rank4) {
+  RunQDQClipTestOnHTP<uint16_t>(TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
+                                {},  // Don't specify min/max inputs.
+                                ExpectedEPNodeAssignment::All,
+                                13,     // opset
+                                true);  // Use com.microsoft Q/DQ ops
+}
+
+// Test 8-bit QDQ Clip with non-default min and max inputs. QNN EP will get a graph with a Clip operator.
 TEST_F(QnnHTPBackendTests, Clip_U8_Rank4) {
   RunQDQClipTestOnHTP<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
                                {TestInputDef<float>({}, true, {-5.0f}),
                                 TestInputDef<float>({}, true, {5.0f})},
                                ExpectedEPNodeAssignment::All);
 }
 
+// Test 16-bit QDQ Clip with non-default min and max inputs. QNN EP will get a graph with a Clip operator.
+TEST_F(QnnHTPBackendTests, Clip_U16_Rank4) {
+  RunQDQClipTestOnHTP<uint16_t>(TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
+                                {TestInputDef<float>({}, true, {-5.0f}),
+                                 TestInputDef<float>({}, true, {5.0f})},
+                                ExpectedEPNodeAssignment::All,
+                                13,     // opset
+                                true);  // Use com.microsoft Q/DQ ops
+}
+
 // Test QDQ Clip of rank 5.
 TEST_F(QnnHTPBackendTests, Clip_U8_Rank5) {
   // We can't use the usual model-building functions because they add standalone Quantize and Dequantize nodes

diff --git a/onnxruntime/test/providers/qnn/flatten_op_test.cc b/onnxruntime/test/providers/qnn/flatten_op_test.cc
@@ -35,6 +35,36 @@ static void RunFlattenTestOnCPU(const TestInputDef<DataType>& input_def,
                   expected_ep_assignment);
 }
 
+//
+// CPU tests:
+//
+
+// Test that Flatten input (rank4) with axis == 0.
+TEST_F(QnnCPUBackendTests, Flatten_Rank4_Axis0) {
+  RunFlattenTestOnCPU(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
+                      {utils::MakeAttribute("axis", static_cast<int64_t>(0))},
+                      ExpectedEPNodeAssignment::All);
+}
+
+// Test that Flatten input (rank4) with axis == -1.
+TEST_F(QnnCPUBackendTests, Flatten_Rank4_AxisNeg1) {
+  RunFlattenTestOnCPU(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
+                      {utils::MakeAttribute("axis", static_cast<int64_t>(-1))},
+                      ExpectedEPNodeAssignment::All);
+}
+
+// Test that Flatten input (rank5) with axis == 2.
+TEST_F(QnnCPUBackendTests, Flatten_Rank5_Axis2) {
+  RunFlattenTestOnCPU(TestInputDef<float>({1, 2, 3, 4, 4}, false, -10.0f, 10.0f),
+                      {utils::MakeAttribute("axis", static_cast<int64_t>(2))},
+                      ExpectedEPNodeAssignment::All);
+}
+
+#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
+//
+// HTP tests:
+//
+
 // Runs a model with a non-QDQ Flatten operator on the QNN HTP backend. Checks the graph node assignment
 // and that inference outputs for QNN EP and CPU EP match.
 template <typename DataType>
@@ -62,7 +92,8 @@ template <typename QType>
 static void RunQDQFlattenTestOnHTP(const TestInputDef<float>& input_def,
                                    const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
                                    ExpectedEPNodeAssignment expected_ep_assignment,
-                                   int opset = 13) {
+                                   int opset = 13,
+                                   bool use_contrib_qdq = false) {
   ProviderOptions provider_options;
 
 #if defined(_WIN32)
@@ -71,57 +102,48 @@ static void RunQDQFlattenTestOnHTP(const TestInputDef<float>& input_def,
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
 
-  TestQDQModelAccuracy(BuildOpTestCase<float>("Flatten", {input_def}, {}, attrs),     // baseline float32 model
-                       BuildQDQOpTestCase<QType>("Flatten", {input_def}, {}, attrs),  // QDQ model
+  auto f32_model_builder = BuildOpTestCase<float>("Flatten", {input_def}, {}, attrs);
+  auto qdq_model_builder = BuildQDQOpTestCase<QType>("Flatten", {input_def}, {}, attrs, kOnnxDomain, use_contrib_qdq);
+  TestQDQModelAccuracy(f32_model_builder,
+                       qdq_model_builder,
                        provider_options,
                        opset,
                        expected_ep_assignment);
 }
 
-//
-// CPU tests:
-//
-
-// Test that Flatten input (rank4) with axis == 0.
-TEST_F(QnnCPUBackendTests, Flatten_Rank4_Axis0) {
-  RunFlattenTestOnCPU(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
-                      {utils::MakeAttribute("axis", static_cast<int64_t>(0))},
-                      ExpectedEPNodeAssignment::All);
-}
-
-// Test that Flatten input (rank4) with axis == -1.
-TEST_F(QnnCPUBackendTests, Flatten_Rank4_AxisNeg1) {
-  RunFlattenTestOnCPU(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
-                      {utils::MakeAttribute("axis", static_cast<int64_t>(-1))},
-                      ExpectedEPNodeAssignment::All);
-}
-
-// Test that Flatten input (rank5) with axis == 2.
-TEST_F(QnnCPUBackendTests, Flatten_Rank5_Axis2) {
-  RunFlattenTestOnCPU(TestInputDef<float>({1, 2, 3, 4, 4}, false, -10.0f, 10.0f),
-                      {utils::MakeAttribute("axis", static_cast<int64_t>(2))},
-                      ExpectedEPNodeAssignment::All);
-}
-
-#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
-//
-// HTP tests:
-//
-// Test that Flatten input (rank4) with axis == 0.
+// Test 8-bit QDQ Flatten input (rank4) with axis == 0.
 TEST_F(QnnHTPBackendTests, Flatten_Rank4_Axis0) {
   RunQDQFlattenTestOnHTP<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
                                   {utils::MakeAttribute("axis", static_cast<int64_t>(0))},
                                   ExpectedEPNodeAssignment::All);
 }
 
-// Test that Flatten input (rank4) with axis == -1.
+// Test 16-bit QDQ Flatten input (rank4) with axis == 0.
+TEST_F(QnnHTPBackendTests, Flatten_Rank4_Axis0_U16) {
+  RunQDQFlattenTestOnHTP<uint16_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
+                                   {utils::MakeAttribute("axis", static_cast<int64_t>(0))},
+                                   ExpectedEPNodeAssignment::All,
+                                   13,     // opset
+                                   true);  // Use com.microsoft Q/DQ ops
+}
+
+// Test 8-bit QDQ Flatten input (rank4) with axis == -1.
 TEST_F(QnnHTPBackendTests, Flatten_Rank4_AxisNeg1) {
   RunQDQFlattenTestOnHTP<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
                                   {utils::MakeAttribute("axis", static_cast<int64_t>(-1))},
                                   ExpectedEPNodeAssignment::All);
 }
 
-// Test QDQ Flatten with an input of rank5.
+// Test 16-bit QDQ Flatten input (rank4) with axis == -1.
+TEST_F(QnnHTPBackendTests, Flatten_Rank4_AxisNeg1_U16) {
+  RunQDQFlattenTestOnHTP<uint16_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
+                                   {utils::MakeAttribute("axis", static_cast<int64_t>(-1))},
+                                   ExpectedEPNodeAssignment::All,
+                                   13,     // opset
+                                   true);  // Use com.microsoft Q/DQ ops
+}
+
+// Test 8-bit QDQ Flatten with an input of rank5.
 TEST_F(QnnHTPBackendTests, Flatten_QDQ8bit_Rank5) {
   // We can't use the usual model-building functions because they add standalone Quantize and Dequantize nodes
   // at the input and output. These Q/DQ ops get lowered to QNN's Quantize and Dequantize operators, which DO NOT
@@ -157,7 +179,7 @@ TEST_F(QnnHTPBackendTests, Flatten_QDQ8bit_Rank5) {
                   ExpectedEPNodeAssignment::All);
 }
 
-// Test that rank4 int32 Flatten runs on HTP backend.
+// Test that int32 non-QDQ Flatten runs on HTP backend.
 TEST_F(QnnHTPBackendTests, Flatten_Int32_Rank4_Axis2) {
   std::vector<int32_t> input_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
   RunFlattenTestOnHTP<int32_t>(TestInputDef<int32_t>({1, 3, 2, 2}, false, input_data),