[QNN EP] Use QNN's ResizeBilinear operator for specific configs of ON…

…NX Resize (#20292) ### Description Uses QNN's ResizeBilinear operator for ONNX Resize with: - input rank: 4 - mode: linear - coordinate transformation mode: half_pixel, align_corners, or asymmetric #### Mapping matrix of ONNX Resize w/ "linear" mode on HTP backend. Table entries correspond to the QNN operator used for the given configuration (Resize = QNN Resize op, RBL = QNN ResizeBilinear op, X = Unsupported). | coordinate_transformation_mode | input_rank < 3 | input_rank = 3 | input_rank = 4 | input_rank = 5 | input_rank > 5 | | ------------- | ------------- |------------- |------------- |------------- |------------- | | half_pixel | X | Resize | RBL | Resize | X | | pytorch_half_pixel | X | Resize | Resize | Resize | X | | align_corners | X | Resize | RBL | Resize | X | | asymmetric | X | Resize | RBL | Resize | X | ### Motivation and Context QNN's ResizeBilinear operator seems to perform better (lower latency) than QNN's Resize operator for certain configurations.
microsoft · Apr 12, 2024 · 327fb1f · 327fb1f
1 parent 9ca1afa
commit 327fb1f
Show file tree

Hide file tree

Showing 3 changed files with 70 additions and 16 deletions.
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc
@@ -148,7 +148,20 @@ Status ResizeOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
                     "QNN EP: Cannot get shape for Resize input");
   const size_t input_rank = input_shape.size();
 
-  // Validate Resize w/ "nearest" mode.
+  // Resize w/ "linear" mode.
+  // Translation matrix of ONNX Resize w/ "linear" mode on HTP backend.
+  // Table entries correspond to the QNN operator used for the given configuration
+  // (Resize = QNN Resize op, RBL = QNN ResizeBilinear op, X = Unsupported).
+  //
+  //                                                   input rank:
+  // coordinate_transformation_mode: |   < 3      3        4        5        > 5
+  // ---------------------------------------------------------------------------------
+  //                      half_pixel |    X     Resize    RBL     Resize       X
+  //              pytorch_half_pixel |    X     Resize    Resize  Resize       X
+  //                   align_corners |    X     Resize    RBL     Resize       X
+  //                      asymmetric |    X     Resize    RBL     Resize       X
+
+  // Resize w/ "nearest" mode.
   // Translation matrix of ONNX Resize w/ "nearest" mode on HTP backend.
   // Table entries correspond to the QNN operator used for the given configuration
   // (Resize = QNN Resize op, RNN = QNN ResizeNearestNeighbor op, X = Unsupported).
@@ -239,36 +252,74 @@ Status ResizeOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
   std::vector<std::string> param_tensor_names;
   NodeAttrHelper node_helper(node_unit);
 
+  const auto& input_0 = node_unit.Inputs()[0];
+  std::vector<uint32_t> input_shape;
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(input_0.node_arg, input_shape),
+                    "QNN EP: Cannot get shape for Resize input");
+  const size_t input_rank = input_shape.size();
   const std::string interp_mode = GetOnnxAttr(node_helper, onnx_mode_attr);
   const std::string transformation_mode = GetOnnxAttr(node_helper, onnx_coord_transf_mode_attr);
   const std::string nearest_mode = GetOnnxAttr(node_helper, onnx_nearest_mode_attr);
   const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
   std::string qnn_op_type = "Resize";
 
-  // Translate Resize with {mode: "nearest", nearest_mode: "floor", coordinate_transformation_mode: XXX} to
-  // QNN's ResizeNearestNeighbor operator on the HTP backend. This combination of parameters is not supported on HTP
-  // via QNN's Resize operator. Note that QNN's ResizeNearestNeighbor operator always uses "floor" rounding.
-  if (is_npu_backend && interp_mode == "nearest" && nearest_mode == "floor") {
+  if (is_npu_backend && input_rank == 4 && interp_mode == "nearest" && nearest_mode == "floor") {
+    // Translate Resize with
+    // {input_rank: 4, mode: "nearest", nearest_mode: "floor", coordinate_transformation_mode: XXX} to
+    // QNN's ResizeNearestNeighbor operator on the HTP backend. This combination of parameters is not supported on HTP
+    // via QNN's Resize operator. Note that QNN's ResizeNearestNeighbor operator always uses "floor" rounding.
     qnn_op_type = "ResizeNearestNeighbor";
 
-    // Parameter 'align_corners'
+    // 'align_corners'
     Qnn_Scalar_t qnn_align_corners = QNN_SCALAR_INIT;
     qnn_align_corners.dataType = QNN_DATATYPE_BOOL_8;
     qnn_align_corners.bool8Value = static_cast<uint8_t>(transformation_mode == "align_corners");
+    QnnParamWrapper qnn_align_corners_param(node_unit.Index(), node_unit.Name(),
+                                            QNN_OP_RESIZE_NEAREST_NEIGHBOR_PARAM_ALIGN_CORNERS, qnn_align_corners);
+    param_tensor_names.push_back(qnn_align_corners_param.GetParamTensorName());
+    qnn_model_wrapper.AddParamWrapper(std::move(qnn_align_corners_param));
+
+    // 'half_pixel_centers'
+    Qnn_Scalar_t qnn_half_pixel = QNN_SCALAR_INIT;
+    qnn_half_pixel.dataType = QNN_DATATYPE_BOOL_8;
+    qnn_half_pixel.bool8Value = static_cast<uint8_t>(transformation_mode == "half_pixel");
+    QnnParamWrapper qnn_half_pixel_param(node_unit.Index(), node_unit.Name(),
+                                         QNN_OP_RESIZE_NEAREST_NEIGHBOR_PARAM_HALF_PIXEL_CENTERS, qnn_half_pixel);
+    param_tensor_names.push_back(qnn_half_pixel_param.GetParamTensorName());
+    qnn_model_wrapper.AddParamWrapper(std::move(qnn_half_pixel_param));
+  } else if (is_npu_backend && input_rank == 4 && interp_mode == "linear" &&
+             transformation_mode != "pytorch_half_pixel") {
+    // Translate Resize with
+    // {input_rank: 4, mode: "linear", coordinate_transformation_mode: XXX} to
+    // QNN's ResizeBilinear operator on the HTP backend. QNN ResizeBilinear seems to be faster than QNN Resize on
+    // Windows/HTP QNN SDK 2.19.2.
+    qnn_op_type = "ResizeBilinear";
+
+    // 'align_corners'
+    Qnn_Scalar_t qnn_align_corners = QNN_SCALAR_INIT;
+    qnn_align_corners.dataType = QNN_DATATYPE_BOOL_8;
+    qnn_align_corners.bool8Value = static_cast<uint8_t>(transformation_mode == "align_corners");
+
     QnnParamWrapper qnn_align_corners_param(node_unit.Index(), node_unit.Name(),
                                             QNN_OP_RESIZE_BILINEAR_PARAM_ALIGN_CORNERS, qnn_align_corners);
+
     param_tensor_names.push_back(qnn_align_corners_param.GetParamTensorName());
     qnn_model_wrapper.AddParamWrapper(std::move(qnn_align_corners_param));
 
-    // Parameter 'half_pixel_centers'
+    // 'half_pixel_centers'
     Qnn_Scalar_t qnn_half_pixel = QNN_SCALAR_INIT;
     qnn_half_pixel.dataType = QNN_DATATYPE_BOOL_8;
     qnn_half_pixel.bool8Value = static_cast<uint8_t>(transformation_mode == "half_pixel");
+
     QnnParamWrapper qnn_half_pixel_param(node_unit.Index(), node_unit.Name(),
                                          QNN_OP_RESIZE_BILINEAR_PARAM_HALF_PIXEL_CENTERS, qnn_half_pixel);
+
     param_tensor_names.push_back(qnn_half_pixel_param.GetParamTensorName());
     qnn_model_wrapper.AddParamWrapper(std::move(qnn_half_pixel_param));
   } else {
+    // Fallback to QNN's Resize operator, which seems to align better with ONNX's Resize attributes and supports
+    // input ranks other than 4, but may not perform as optimally (at the moment).
+
     // Parameter 'transformation_mode'
     Qnn_Scalar_t qnn_transformation_mode = QNN_SCALAR_INIT;
     qnn_transformation_mode.dataType = QNN_DATATYPE_UINT_32;

diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc
@@ -703,6 +703,7 @@ TEST_F(QnnHTPBackendTests, MultithreadHtpPowerCfgDefaultAndRunOption) {
 
 // Test shape inference of QDQ NHWC Resize operator (opset 18) that uses
 // the sizes input. Use the QNN HTP backend.
+// Maps to QNN's ResizeBilinear operator.
 TEST_F(QnnHTPBackendTests, TestNHWCResizeShapeInference_qdq_sizes_opset18) {
   RunNHWCResizeModel(ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.quant.onnx", true);
 }

diff --git a/onnxruntime/test/providers/qnn/resize_test.cc b/onnxruntime/test/providers/qnn/resize_test.cc
@@ -177,7 +177,7 @@ static void RunQDQResizeOpTest(const TestInputDef<float>& input_def,
 }
 
 //
-// CPU tests:
+// CPU tests (all map to QNN's Resize on CPU):
 //
 
 // Upsample that uses "round_prefer_floor" as the "nearest_mode".
@@ -324,6 +324,7 @@ TEST_F(QnnCPUBackendTests, DISABLED_Resize_DownSample_Linear_HalfPixel_scales) {
 //
 
 // Test QDQ Resize downsample with mode: "linear", coordinate_transformation_mode: "align_corners"
+// Maps to QNN's ResizeBilinear operator.
 TEST_F(QnnHTPBackendTests, Resize_DownSample_Linear_AlignCorners) {
   std::vector<float> input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
   RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 1, 2, 4}, false, input_data),
@@ -332,6 +333,7 @@ TEST_F(QnnHTPBackendTests, Resize_DownSample_Linear_AlignCorners) {
 }
 
 // Test QDQ Resize downsample with mode: "linear", coordinate_transformation_mode: "half_pixel"
+// Maps to QNN's ResizeBilinear operator.
 TEST_F(QnnHTPBackendTests, Resize_DownSample_Linear_HalfPixel) {
   std::vector<float> input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
   RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 1, 2, 4}, false, input_data),
@@ -343,7 +345,7 @@ TEST_F(QnnHTPBackendTests, Resize_DownSample_Linear_HalfPixel) {
 }
 
 // Test 2x QDQ Resize mode: "linear", coordinate_transformation_mode: "pytorch_half_pixel"
-// QNN EP uses QNN's Resize op.
+// Maps to QNN's Resize operator.
 TEST_F(QnnHTPBackendTests, ResizeU8_2xLinearPytorchHalfPixel) {
   std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
   RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),
@@ -355,7 +357,7 @@ TEST_F(QnnHTPBackendTests, ResizeU8_2xLinearPytorchHalfPixel) {
 }
 
 // Test 2x QDQ Resize mode: "linear", coordinate_transformation_mode: "half_pixel"
-// QNN EP uses QNN's Resize op.
+// Maps to QNN's ResizeBilinear operator.
 TEST_F(QnnHTPBackendTests, ResizeU8_2xLinearHalfPixel) {
   std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
   RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),
@@ -367,7 +369,7 @@ TEST_F(QnnHTPBackendTests, ResizeU8_2xLinearHalfPixel) {
 }
 
 // Test 2x QDQ Resize mode: "linear", coordinate_transformation_mode: "align_corners"
-// QNN EP uses QNN's Resize op.
+// Maps to QNN's ResizeBilinear operator.
 TEST_F(QnnHTPBackendTests, ResizeU8_2xLinearAlignCorners) {
   std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
   RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),
@@ -379,7 +381,7 @@ TEST_F(QnnHTPBackendTests, ResizeU8_2xLinearAlignCorners) {
 }
 
 // Test 2x QDQ Resize mode: "linear", coordinate_transformation_mode: "asymmetric"
-// QNN EP uses QNN's Resize op.
+// Maps to QNN's ResizeBilinear operator.
 TEST_F(QnnHTPBackendTests, ResizeU8_2xLinearAsymmetric) {
   std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
   RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),
@@ -391,7 +393,7 @@ TEST_F(QnnHTPBackendTests, ResizeU8_2xLinearAsymmetric) {
 }
 
 // Test 2x QDQ Resize mode: "nearest", coordinate_transformation_mode: "half_pixel", nearest_mode: "round_prefer_floor"
-// QNN EP uses QNN's Resize op.
+// Maps to QNN's Resize operator.
 TEST_F(QnnHTPBackendTests, ResizeU8_2xNearestHalfPixelRoundPreferFloor) {
   std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
   RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),
@@ -408,7 +410,7 @@ TEST_F(QnnHTPBackendTests, ResizeU8_NearestModeCeil_Unsupported) {
 }
 
 // Test 3x QDQ Resize mode: "nearest", coordinate_transformation_mode: "asymmetric", nearest_mode: "floor".
-// QNN EP uses QNN's ResizeNearestNeighbor op.
+// Maps to QNN's ResizeNearestNeighbor operator.
 TEST_F(QnnHTPBackendTests, ResizeU8_3xNearestAsymmetricFloor) {
   std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
   RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),
@@ -417,7 +419,7 @@ TEST_F(QnnHTPBackendTests, ResizeU8_3xNearestAsymmetricFloor) {
 }
 
 // Test 2x QDQ Resize mode: "nearest", coordinate_transformation_mode: "asymmetric", nearest_mode: "round_prefer_floor"
-// QNN EP uses QNN's Resize op.
+// Maps to QNN's Resize operator.
 TEST_F(QnnHTPBackendTests, ResizeU8_2xNearestAsymmetricRoundPreferFloor) {
   std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 8);
   RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 2, 2, 2}, false, input_data),
@@ -447,7 +449,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_ResizeU8_3xNearestAsymmetricRoundPreferFloor
 }
 
 // Test 0.5x QDQ Resize mode: "nearest", coordinate_transformation_mode: "asymmetric", nearest_mode: "floor"
-// QNN EP uses QNN's ResizeNearestNeighbor op.
+// Maps to QNN's ResizeNearestNeighbor operator.
 TEST_F(QnnHTPBackendTests, ResizeU8_HalfNearestAsymmetricFloor) {
   std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
   RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),