[WebNN EP] ConvTranspose should calculate the pads or output shape

This PR adds the missing pads and output shape calculation for ConvTranspose. Per ONNX spec: - If the output shape is explicitly provided, compute the pads. - Otherwise compute the output shape, as well as the pads if the auto_pad attribute is SAME_UPPER/SAME_LOWER.
microsoft · Jul 19, 2024 · 9f59690 · 9f59690
1 parent 9140d9b
commit 9f59690
Show file tree

Hide file tree

Showing 3 changed files with 126 additions and 56 deletions.
diff --git a/onnxruntime/core/providers/webnn/builders/impl/builder_utils.cc b/onnxruntime/core/providers/webnn/builders/impl/builder_utils.cc
@@ -68,5 +68,93 @@ common::Status HandleAutoPad(const std::vector<int64_t> input_shape,
   return Status::OK();
 }
 
+common::Status ComputeConvTransposePadAndOutputShape(
+    const int64_t in_size,
+    const int64_t stride,
+    const int64_t kernel,
+    const int64_t dilation,
+    const int64_t adj,
+    AutoPadType pad_type,
+    int64_t& pad_head,
+    int64_t& pad_tail,
+    int64_t& out_size) {
+  // Output shape is explicitly provided - pad values will have to be computed.
+  if (out_size != -1) {
+    // total pad
+    auto total_pad = ComputeTotalPad(in_size, stride, adj, kernel, dilation, out_size);
+    DistributePadding(pad_type, total_pad, pad_head, pad_tail);
+    return Status::OK();
+  }
+
+  // Output shape is not provided - it needs to be computed along with pad values (if applicable).
+
+  // Compute padding if the auto_pad attribute is SAME_UPPER/SAME_LOWER.
+  if (pad_type == AutoPadType::SAME_UPPER || pad_type == AutoPadType::SAME_LOWER) {
+    // The ONNX spec says if `auto_pad` attribute is set, pad until the `out_size`
+    // is `in_size * stride`.
+    auto total_pad = ComputeTotalPad(in_size, stride, adj,
+                                     kernel, dilation, /*out_size = */ in_size * stride);
+    DistributePadding(pad_type, total_pad, pad_head, pad_tail);
+  }
+
+  out_size = (in_size - 1) * stride + adj + (kernel - 1) * dilation + 1 - pad_head - pad_tail;
+
+  return Status::OK();
+}
+
+common::Status ComputeConvTransposePadsAndOutputShape(const std::vector<int64_t> input_shape,
+                                                      const int64_t weight_size_y,
+                                                      const int64_t weight_size_x,
+                                                      const std::vector<int64_t>& onnx_pads,
+                                                      const std::vector<int64_t>& onnx_strides,
+                                                      const std::vector<int64_t>& onnx_dilations,
+                                                      const std::vector<int64_t>& onnx_output_padding,
+                                                      AutoPadType auto_pad_type,
+                                                      std::vector<int64_t>& pads_out,
+                                                      std::vector<int64_t>& output_shape_out,
+                                                      bool use_nchw) {
+  const int64_t input_size_y = use_nchw ? input_shape[2] : input_shape[1];
+  const int64_t input_size_x = use_nchw ? input_shape[3] : input_shape[2];
+  const int64_t stride_y = onnx_strides[0];
+  const int64_t stride_x = onnx_strides[1];
+  const int64_t dilation_y = onnx_dilations[0];
+  const int64_t dilation_x = onnx_dilations[1];
+  const int64_t output_padding_y = onnx_output_padding[0];
+  const int64_t output_padding_x = onnx_output_padding[1];
+
+  int64_t padding_top = onnx_pads[0];
+  int64_t padding_bottom = onnx_pads[2];
+  int64_t padding_left = onnx_pads[1];
+  int64_t padding_right = onnx_pads[3];
+  int64_t output_shape_out_y = output_shape_out[0];
+  int64_t output_shape_out_x = output_shape_out[1];
+  ORT_RETURN_IF_ERROR(ComputeConvTransposePadAndOutputShape(
+      input_size_y,
+      stride_y,
+      weight_size_y,
+      dilation_y,
+      output_padding_y,
+      auto_pad_type,
+      padding_top,
+      padding_bottom,
+      output_shape_out_y));
+  ORT_RETURN_IF_ERROR(ComputeConvTransposePadAndOutputShape(
+      input_size_x,
+      stride_x,
+      weight_size_x,
+      dilation_x,
+      output_padding_x,
+      auto_pad_type,
+      padding_left,
+      padding_right,
+      output_shape_out_x));
+
+  // WebNN only needs the height and width of the output shape.
+  output_shape_out = {output_shape_out_y, output_shape_out_x};
+  pads_out = {padding_top, padding_left, padding_bottom, padding_right};
+
+  return Status::OK();
+}
+
 }  // namespace webnn
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/builders/impl/builder_utils.h b/onnxruntime/core/providers/webnn/builders/impl/builder_utils.h
@@ -24,5 +24,18 @@ common::Status HandleAutoPad(const std::vector<int64_t> input_shape,
                              std::vector<int64_t>& pads_out,
                              bool use_nchw) ORT_MUST_USE_RESULT;
 
+// Compute pads and output shape for ConvTranspose.
+common::Status ComputeConvTransposePadsAndOutputShape(const std::vector<int64_t> input_shape,
+                                                      const int64_t weight_size_y,
+                                                      const int64_t weight_size_x,
+                                                      const std::vector<int64_t>& onnx_pads,
+                                                      const std::vector<int64_t>& onnx_strides,
+                                                      const std::vector<int64_t>& onnx_dilations,
+                                                      const std::vector<int64_t>& onnx_output_padding,
+                                                      AutoPadType auto_pad_type,
+                                                      std::vector<int64_t>& pads_out,
+                                                      std::vector<int64_t>& output_shape_out,
+                                                      bool use_nchw) ORT_MUST_USE_RESULT;
+
 }  // namespace webnn
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc
@@ -56,72 +56,41 @@ common::Status SetConvBaseOptions(ModelBuilder& model_builder,
 
   // Add Padding.
   AutoPadType auto_pad_type = StringToAutoPadType(helper.Get("auto_pad", "NOTSET"));
-  if (node.OpType() == "Conv") {
+  std::vector<int64_t> pads_out;
+  if (node.OpType() == "Conv" || node.OpType() == "ConvInteger") {
     // Calculate explicit padding for autoPad.
     if (AutoPadType::SAME_UPPER == auto_pad_type || AutoPadType::SAME_LOWER == auto_pad_type) {
-      std::vector<int64_t> pads_out;
       ORT_RETURN_IF_ERROR(HandleAutoPad(input_shape, weight_shape[2], weight_shape[3],
                                         pads, strides, dilations, auto_pad_type, pads_out, !is_nhwc));
       pads = pads_out;
     }
   } else if (node.OpType() == "ConvTranspose") {
-    // When the 'output_shape' is specificed, the 'output_padding' values
-    // in options.outputPadding are ignored.
-    std::vector<int64_t> dims;
-    std::vector<int64_t> output_padding{0, 0};
-    if (helper.HasAttr("output_shape")) {
-      // Default value of 'output_shape' will be ignored as we already check if it existed.
-      dims = helper.Get("output_shape", std::vector<int64_t>{-1, -1});
-      // Extract the height and width.
-      std::vector<int64_t> output_shape;
-      if (dims.size() == 1 && is_conv1d) {  // ConvTranspose 1d
-        output_shape = {dims[0], 1};
-      } else if (dims.size() == 2 && !is_conv1d) {
-        output_shape = dims;
-      } else {
-        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Invalid output shape");
-      }
-      // Padding values are auto generated.
-      if (helper.HasAttr("kernel_shape")) {
-        std::vector<int64_t> kernel_shape = helper.Get("kernel_shape", std::vector<int64_t>{-1, -1});
-        if (is_conv1d) {  // ConvTranspose 1d
-          kernel_shape.push_back(1);
-        }
-        std::vector<int64_t> total_padding(2);
-        for (size_t i = 0; i < 2; i++) {
-          // Get the dimensions of H and W.
-          // For NHWC layout, the dimensions of H and W correspond to index 1 and 2.
-          // For NCHW layout, the dimensions of H and W correspond to index 2 and 3.
-          if (is_nhwc) {
-            total_padding[i] = strides[i] * (input_shape[i + 1] - 1) + output_padding[i] +
-                               ((kernel_shape[i] - 1) * dilations[i] + 1) - output_shape[i];
-          } else {
-            total_padding[i] = strides[i] * (input_shape[i + 2] - 1) + output_padding[i] +
-                               ((kernel_shape[i] - 1) * dilations[i] + 1) - output_shape[i];
-          }
-        }
-        AutoPadType auto_pad_type = StringToAutoPadType(helper.Get("auto_pad", "NOTSET"));
-        if (AutoPadType::SAME_UPPER == auto_pad_type || AutoPadType::SAME_LOWER == auto_pad_type) {
-          pads[0] = total_padding[0] / 2;
-          pads[1] = total_padding[0] - pads[0];
-          pads[2] = total_padding[1] / 2;
-          pads[3] = total_padding[1] - pads[2];
-          if (AutoPadType::SAME_LOWER == auto_pad_type) {
-            std::swap(pads[0], pads[1]);
-            std::swap(pads[2], pads[3]);
-          }
-        }
-      }
+    std::vector<int64_t> output_shape = helper.Get("output_shape", std::vector<int64_t>{-1, -1});
+    // Appending 1's if it is ConvTranspose 1d and output shape is provided.
+    if (output_shape.size() == 1 && is_conv1d && output_shape[0] != -1) {
+      output_shape.push_back(1);
+    }
+
+    std::vector<int64_t> output_padding = helper.Get("output_padding", std::vector<int64_t>{0, 0});
+    // Appending 0's if it is ConvTranspose 1d.
+    if (output_padding.size() == 1 && is_conv1d) {
+      output_padding.push_back(0);
+    }
+    options.set("outputPadding", emscripten::val::array(GetVecUint32FromVecInt64(output_padding)));
+
+    // If output shape is explicitly provided, compute the pads.
+    // Otherwise compute the output shape, as well as the pads if the auto_pad attribute is SAME_UPPER/SAME_LOWER.
+    ORT_RETURN_IF_ERROR(ComputeConvTransposePadsAndOutputShape(input_shape, weight_shape[2], weight_shape[3],
+                                                               pads, strides, dilations, output_padding,
+                                                               auto_pad_type, pads_out, output_shape, !is_nhwc));
+
+    if (output_shape[0] != -1 && output_shape[1] != -1) {
       options.set("outputSizes", emscripten::val::array(GetVecUint32FromVecInt64(output_shape)));
-    } else {
-      output_padding = helper.Get("output_padding", std::vector<int64_t>{0, 0});
-      if (output_padding.size() == 1 && is_conv1d) {  // ConvTranspose 1d
-        output_padding.push_back(0);
-      }
-      options.set("outputPadding", emscripten::val::array(GetVecUint32FromVecInt64(output_padding)));
     }
+    pads = pads_out;
   } else {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "conv_op_builder only supports Op Conv and ConvTranspose.");
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "conv_op_builder only supports Op Conv, ConvInteger and ConvTranspose.");
   }
 
   const auto group = helper.Get("group", static_cast<uint32_t>(1));