Enable QDQ Clip on QNN HTP backend. Add unit tests.

microsoft · Sep 19, 2023 · 2091545 · 2091545
1 parent d9d79cd
commit 2091545
Show file tree

Hide file tree

Showing 4 changed files with 104 additions and 106 deletions.
diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc
@@ -78,7 +78,8 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() {
           {"Abs", {}},
           {"Neg", {}},
           {"DepthToSpace", {}},
-          {"SpaceToDepth", {}}};
+          {"SpaceToDepth", {}},
+          {"Clip", {}}};
 }
 static const OpVersionsAndSelector::OpVersionsMap GetBinaryOpVersionsMap() {
   return {{"Add", {}},

diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc
@@ -33,8 +33,6 @@ class ClipOpBuilder : public BaseOpBuilder {
 
  private:
   Status ExplictOpCheck(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const;
-  mutable float min_value_ = std::numeric_limits<float>::lowest();
-  mutable float max_value_ = std::numeric_limits<float>::max();
 };
 
 Status ClipOpBuilder::ExplictOpCheck(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const {
@@ -61,82 +59,68 @@ Status ClipOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
   if (do_op_validation) {
     ORT_RETURN_IF_ERROR(ExplictOpCheck(qnn_model_wrapper, node_unit));
   }
-  Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;
-
-  auto inputs = node_unit.Inputs();
-  for (size_t input_i = 0; input_i < inputs.size(); ++input_i) {
-    Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
-    bool is_quantized_tensor = inputs[input_i].quant_param.has_value();
-    utils::InitializeQuantizeParam(quantize_param, is_quantized_tensor);
-
-    auto& input_name = inputs[input_i].node_arg.Name();
-    if (input_name.empty()) {
-      // Ignore unspecified/unused optional input
-      continue;
-    }
-    if (qnn_model_wrapper.IsQnnTensorWrapperExist(input_name)) {
-      LOGS(logger, VERBOSE) << "Tensor already added or the input is not named, skip it: " << input_name;
-      input_names.push_back(input_name);
-      continue;
-    }
-
-    const auto* type_proto = inputs[input_i].node_arg.TypeAsProto();
-    ORT_RETURN_IF_ERROR(utils::GetQnnDataType(is_quantized_tensor, type_proto, qnn_data_type));
-
-    std::vector<uint32_t> input_shape;
-    ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[input_i].node_arg, input_shape), "Cannot get shape");
-
-    ORT_RETURN_IF_NOT(qnn_model_wrapper.ProcessQuantizationParameter(inputs[input_i].quant_param,
-                                                                     quantize_param.scaleOffsetEncoding.scale,
-                                                                     quantize_param.scaleOffsetEncoding.offset),
-                      "Cannot get quantization parameter");
-
-    float* ini_data = nullptr;
-    std::vector<uint8_t> unpacked_tensor;
-    bool is_initializer_input = qnn_model_wrapper.IsInitializerInput(input_name);
-    if (is_initializer_input) {
-      const auto& input_tensor = qnn_model_wrapper.GetInitializerTensors().at(input_name);
-      ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_tensor, unpacked_tensor));
-      ini_data = reinterpret_cast<float*>(unpacked_tensor.data());
-      if (input_i == 1) {
-        min_value_ = *ini_data;
-        continue;
-      } else if (input_i == 2) {
-        max_value_ = *ini_data;
-        continue;
-      }
-    }
-    ORT_ENFORCE(input_i == 0, "QNN ReluMinMax operator expects only one input. Min and max are expected to be parameters, ie. initializer inputs in ONNX model");
 
-    Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, input_name);
-    QnnTensorWrapper input_tensorwrapper(input_name, tensor_type, qnn_data_type, quantize_param,
-                                         std::move(input_shape), std::move(unpacked_tensor));
-    ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor.");
-    input_names.push_back(input_name);
-  }
-
-  return Status::OK();
+  return ProcessInput(qnn_model_wrapper, node_unit.Inputs()[0], logger, input_names);
 }
 
 Status ClipOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
                                                   const NodeUnit& node_unit,
                                                   std::vector<std::string>&& input_names,
                                                   const logging::Logger& logger,
                                                   bool do_op_validation) const {
+  const auto& inputs = node_unit.Inputs();
+  const size_t num_inputs = inputs.size();
+
+  const Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;
   std::vector<std::string> param_tensor_names;
-  Qnn_Scalar_t min_qnn_scalar = QNN_SCALAR_INIT;
-  min_qnn_scalar.dataType = QNN_DATATYPE_FLOAT_32;
-  min_qnn_scalar.floatValue = min_value_;
-  QnnParamWrapper min_value_param(node_unit.Index(), node_unit.Name(), QNN_OP_RELU_MIN_MAX_PARAM_MIN_VALUE, min_qnn_scalar);
-  param_tensor_names.push_back(min_value_param.GetParamTensorName());
-  qnn_model_wrapper.AddParamWrapper(std::move(min_value_param));
-
-  Qnn_Scalar_t max_qnn_scalar = QNN_SCALAR_INIT;
-  max_qnn_scalar.dataType = QNN_DATATYPE_FLOAT_32;
-  max_qnn_scalar.floatValue = max_value_;
-  QnnParamWrapper max_value_param(node_unit.Index(), node_unit.Name(), QNN_OP_RELU_MIN_MAX_PARAM_MAX_VALUE, max_qnn_scalar);
-  param_tensor_names.push_back(max_value_param.GetParamTensorName());
-  qnn_model_wrapper.AddParamWrapper(std::move(max_value_param));
+
+  auto get_f32_from_bytes = [](const std::vector<uint8_t>& bytes, float default_val) -> float {
+    return bytes.empty() ? default_val : *reinterpret_cast<const float*>(bytes.data());
+  };
+
+  // Set the 'min' parameter.
+  {
+    std::vector<uint8_t> min_val_bytes;
+
+    if (num_inputs > 1 && !inputs[1].node_arg.Name().empty()) {
+      OnnxInputInfo min_input_info = {};
+      ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[1], min_input_info));
+      ORT_RETURN_IF_NOT(min_input_info.qnn_data_type == qnn_data_type,
+                        "QNN EP: The 'min' input of the Clip operator must be of type float32.");
+      ORT_RETURN_IF_NOT(min_input_info.is_initializer, "QNN EP: The Clip operator's 'min' input must be an initializer.");
+      ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*min_input_info.initializer_tensor, min_val_bytes));
+    }
+
+    Qnn_Scalar_t min_qnn_scalar = QNN_SCALAR_INIT;
+    min_qnn_scalar.dataType = qnn_data_type;
+    min_qnn_scalar.floatValue = get_f32_from_bytes(min_val_bytes, std::numeric_limits<float>::lowest());
+    QnnParamWrapper min_value_param(node_unit.Index(), node_unit.Name(), QNN_OP_RELU_MIN_MAX_PARAM_MIN_VALUE,
+                                    min_qnn_scalar);
+    param_tensor_names.push_back(min_value_param.GetParamTensorName());
+    qnn_model_wrapper.AddParamWrapper(std::move(min_value_param));
+  }
+
+  // Set the 'max' parameter.
+  {
+    std::vector<uint8_t> max_val_bytes;
+
+    if (num_inputs > 2 && !inputs[2].node_arg.Name().empty()) {
+      OnnxInputInfo max_input_info = {};
+      ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[2], max_input_info));
+      ORT_RETURN_IF_NOT(max_input_info.qnn_data_type == qnn_data_type,
+                        "QNN EP: The 'max' input of the Clip operator must of type float32.");
+      ORT_RETURN_IF_NOT(max_input_info.is_initializer, "QNN EP: The Clip operator's 'max' input must be an initializer.");
+      ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*max_input_info.initializer_tensor, max_val_bytes));
+    }
+
+    Qnn_Scalar_t max_qnn_scalar = QNN_SCALAR_INIT;
+    max_qnn_scalar.dataType = qnn_data_type;
+    max_qnn_scalar.floatValue = get_f32_from_bytes(max_val_bytes, std::numeric_limits<float>::max());
+    QnnParamWrapper max_value_param(node_unit.Index(), node_unit.Name(), QNN_OP_RELU_MIN_MAX_PARAM_MAX_VALUE,
+                                    max_qnn_scalar);
+    param_tensor_names.push_back(max_value_param.GetParamTensorName());
+    qnn_model_wrapper.AddParamWrapper(std::move(max_value_param));
+  }
 
   ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
                                      std::move(input_names),

diff --git a/onnxruntime/test/providers/qnn/clip_op_test.cc b/onnxruntime/test/providers/qnn/clip_op_test.cc
@@ -120,54 +120,67 @@ static void RunQDQClipTestOnHTP(const std::vector<TestInputDef<float>>& input_de
                        BuildQDQClipTestCase<QType>(input_defs),  // QDQ model
                        provider_options,
                        opset,
-                       expected_ep_assignment,
-                       1e-4f, logging::Severity::kVERBOSE);
+                       expected_ep_assignment);
 }
 
-// Runs a model with a non-QDQ Clip operator on the QNN HTP backend. Checks the graph node assignment
-// and that inference outputs for QNN EP and CPU EP match.
-template <typename DataType>
-static void RunClipTestOnHTP(const std::vector<TestInputDef<DataType>>& input_defs,
-                             ExpectedEPNodeAssignment expected_ep_assignment,
-                             int opset = 13) {
-  ProviderOptions provider_options;
-
-#if defined(_WIN32)
-  provider_options["backend_path"] = "QnnHtp.dll";
-#else
-  provider_options["backend_path"] = "libQnnHtp.so";
-#endif
-
-  RunQnnModelTest(BuildOpTestCase("Clip", input_defs, {}),
-                  provider_options,
-                  opset,
-                  expected_ep_assignment);
-}
-
-// Test QDQ Clip with default min/max. (Fused with QuantizeLinear by optimizer).
-TEST_F(QnnHTPBackendTests, Clip_4D_DefaultMinMax) {
+// Test QDQ Clip with default min/max.
+// NOTE: The Clip operator is *optimized* away during L1 optimizations, so QNN EP does not get a graph with a Clip op.
+// Instead, QNN EP will get a graph with a Q -> DQ.
+// - Original sequence: Q1 -> DQ1 -> Clip -> Q2 -> DQ2
+// - ClipQuantFusion: Fuses Clip -> QuantizeLinear resulting in Q1 -> DQ1 -> Q2' -> DQ2
+// - DoubleQDQPairsRemover: Simplifies remaining Q1 -> DQ1 -> Q2' -> DQ2 sequence to Q1 -> DQ2.
+TEST_F(QnnHTPBackendTests, Clip_U8_DefaultMinMax_Rank4) {
   RunQDQClipTestOnHTP<uint8_t>({TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48))},
                                ExpectedEPNodeAssignment::All);
 }
 
-// Test QDQ Clip with non-default min and max inputs.
-TEST_F(QnnHTPBackendTests, Clip_4D) {
+// Test QDQ Clip with non-default min and max inputs. QNN EP will get a graph with a Clip operator.
+TEST_F(QnnHTPBackendTests, Clip_U8_Rank4) {
   RunQDQClipTestOnHTP<uint8_t>({TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
                                 TestInputDef<float>({}, true, {-5.0f}),
                                 TestInputDef<float>({}, true, {5.0f})},
                                ExpectedEPNodeAssignment::All);
 }
 
-#if 0
-// Test non-QDQ Clip with 4D input on HTP
-TEST_F(QnnHTPBackendTests, Clip_NotQDQ_4D_f32) {
-  RunClipTestOnHTP<float>({TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
-                           TestInputDef<float>({}, true, {-5.0f}),
-                           TestInputDef<float>({}, true, {5.0f})},
-                          ExpectedEPNodeAssignment::All);
-}
+// Test QDQ Clip of rank 5.
+TEST_F(QnnHTPBackendTests, Clip_U8_Rank5) {
+  // We can't use the usual model-building functions because they add standalone Quantize and Dequantize nodes
+  // at the input and output. These Q/DQ ops get lowered to QNN's Quantize and Dequantize operators, which DO NOT
+  // support rank 5 tensors. Therefore, we have to create a test model that only instantiates the DQ -> Clip -> Q
+  // QDQ node group, which gets lowered to a single QNN Clip node.
+  GetTestModelFn model_fn = [](ModelTestBuilder& builder) {
+    // input (u8) -> DQ ->
+    NodeArg* quant_input = builder.MakeInput<uint8_t>({1, 1, 2, 2, 2}, {0, 1, 6, 10, 20, 100, 128, 255});
+    NodeArg* input_dq = builder.MakeIntermediate();
+    builder.AddDequantizeLinearNode<uint8_t>(quant_input, 1.0f, 0, input_dq);  // scale = 1.0, zp = 0
+
+    // Min/Max initializers
+    NodeArg* min_input = builder.MakeScalarInitializer(5.0f);
+    NodeArg* max_input = builder.MakeScalarInitializer(100.0f);
+
+    // Unsqueeze ->
+    NodeArg* clip_output = builder.MakeIntermediate();
+    builder.AddNode("Clip", {input_dq, min_input, max_input}, {clip_output});
+
+    // Q -> output (u8)
+    NodeArg* output = builder.MakeOutput();
+    builder.AddQuantizeLinearNode<uint8_t>(clip_output, 1.0f, 0, output);  // scale = 1.0, zp = 0
+  };
+
+  ProviderOptions provider_options;
+
+#if defined(_WIN32)
+  provider_options["backend_path"] = "QnnHtp.dll";
+#else
+  provider_options["backend_path"] = "libQnnHtp.so";
 #endif
 
+  RunQnnModelTest(model_fn,
+                  provider_options,
+                  13,  // opset
+                  ExpectedEPNodeAssignment::All);
+}
+
 #endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
 }  // namespace test
 }  // namespace onnxruntime

diff --git a/onnxruntime/test/providers/qnn/squeeze_unsqueeze_op_test.cc b/onnxruntime/test/providers/qnn/squeeze_unsqueeze_op_test.cc
@@ -232,15 +232,15 @@ TEST_F(QnnHTPBackendTests, Squeeze_Rank5_Rank2_f32) {
                   ExpectedEPNodeAssignment::All);
 }
 
-// Test Squeeze of rank 4 -> rank 3 with a negative axes value.
+// Test QDQ Squeeze of rank 4 -> rank 3 with a negative axes value.
 TEST_F(QnnHTPBackendTests, Squeeze_Rank4_Rank3_NegAxes_f32) {
   RunQDQSqueezeTestOnHTP<uint8_t>("Squeeze",
                                   TestInputDef<float>({1, 3, 2, 1}, false, -10.0f, 10.0f),
                                   TestInputDef<int64_t>({1}, true, {-1}),  // Squeeze last axis => (1, 3, 2)
                                   ExpectedEPNodeAssignment::All);
 }
 
-// Test Unsqueeze of rank 3 -> rank 5.
+// Test QDQ Unsqueeze of rank 3 -> rank 5.
 TEST_F(QnnHTPBackendTests, Unsqueeze_Rank3_Rank5_f32) {
   // We can't use the usual model-building functions because they add standalone Quantize and Dequantize nodes
   // at the input and output. These Q/DQ ops get lowered to QNN's Quantize and Dequantize operators, which DO NOT