From ba40022ec42a4b60d4b1ef875d6613923e9e8624 Mon Sep 17 00:00:00 2001 From: Wanming Lin Date: Wed, 23 Oct 2024 11:26:34 +0800 Subject: [PATCH] [WebNN EP] Support axes and fix some validation for Resize (#21952) - Supports arbitrary axes for Resize opset 18+ - Check all inputs and attributes more carefully --------- Co-authored-by: Dwayne Robinson --- js/web/docs/webnn-operators.md | 2 +- .../core/providers/webnn/builders/helper.h | 36 +++ .../webnn/builders/impl/resize_op_builder.cc | 287 +++++++++++------- 3 files changed, 216 insertions(+), 109 deletions(-) diff --git a/js/web/docs/webnn-operators.md b/js/web/docs/webnn-operators.md index f696264aeead7..bf0f1dffb83ee 100644 --- a/js/web/docs/webnn-operators.md +++ b/js/web/docs/webnn-operators.md @@ -78,7 +78,7 @@ operators and the supported opset domain/versions in **WebNN EP** by ONNX Runtim | ReduceSumSquare | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceSumSquare | ✓ | ✓ | Input 'axes' if present should be a constant | | Relu | ai.onnx(7-12, 13, 14+) | relu | ✓ | ✓ | | | Reshape | ai.onnx(7-12, 13, 14-18, 19-20, 21+) | reshape | ✓ | ✓ | Input 'shape' should be a constant, 0 dimension value in 'shape' is not supported | -| Resize | ai.onnx(11-12, 13-17, 18, 19+) | resample2d | ✓ | ✓ | Only supports 4-D input, exclude_outside != 0, input 'scales' and 'sizes' if present must be a constant, 'linear' and 'nearest' modes | +| Resize | ai.onnx(11-12, 13-17, 18, 19+) | resample2d | ✓ | ✓ | Only supports 4-D input, antialias == 0, coordinate_transformation_mode == 'half_pixel', exclude_outside == 0, keep_aspect_ratio_policy == 'stretch', 'linear' and 'nearest' modes, input 'scales' and 'sizes' if present must be a constant | | Shape | ai.onnx(7-12, 13-14, 15-18, 19-20, 21+) | slice | ✓ | ✓ | | | Sigmoid | ai.onnx(7-12, 13+) | sigmoid | ✓ | ✓ | | | Softplus | ai.onnx(7+) | softplus | ✓ | ✓ | | diff --git a/onnxruntime/core/providers/webnn/builders/helper.h b/onnxruntime/core/providers/webnn/builders/helper.h index aecb1f7a03bb9..ec9993bf138ba 100644 --- a/onnxruntime/core/providers/webnn/builders/helper.h +++ b/onnxruntime/core/providers/webnn/builders/helper.h @@ -36,6 +36,31 @@ WebnnDeviceType DeviceTypeFromString(const std::string_view& device_type); // Collects all the initializer tensors in the subGraph and its ancestor graphs. InitializedTensorSet CollectAllInitializedTensors(const GraphViewer& graph_viewer); +inline std::vector convertAxesFromNCHWtoNHWC(const std::vector& axes) { + constexpr std::array nchw_to_nhwc = {0, 3, 1, 2}; + std::vector new_axes; + new_axes.reserve(axes.size()); + for (int64_t axis : axes) { + if (axis >= nchw_to_nhwc.size()) { + ORT_THROW("Invalid axis value: ", axis); + } + new_axes.push_back(nchw_to_nhwc[static_cast(axis)]); + } + return new_axes; +} + +inline std::vector HandleNegativeAxes(const std::vector& axes, size_t input_size) { + std::vector new_axes(axes.size()); + for (size_t i = 0; i < axes.size(); ++i) { + new_axes[i] = HandleNegativeAxis(axes[i], input_size); + } + return new_axes; +} + +inline std::vector GetResolvedAxes(const NodeAttrHelper& helper, size_t input_size) { + return HandleNegativeAxes(helper.Get("axes", std::vector{}), input_size); +} + bool GetShape(const NodeArg& node_arg, std::vector& shape, const logging::Logger& logger); template @@ -144,6 +169,17 @@ inline bool ReadScalarTensorData(const onnx::TensorProto& tensor, emscripten::va return true; } +inline bool IsEmptyTensor(const InitializedTensorSet& initializers, const std::string& name) { + if (name.empty() || !Contains(initializers, name)) { + return true; + } + + const auto& tensor = *initializers.at(name); + const auto dims = tensor.dims(); + // An empty tensor contains a 0 in the dimensions list. + return std::any_of(dims.begin(), dims.end(), [](auto d) { return d == 0; }); +} + bool IsInputSupported(const NodeArg& node_arg, const std::string& parent_name, const logging::Logger& logger); // Get a list of groups of supported nodes, each group represents a subgraph supported by WebNN EP. diff --git a/onnxruntime/core/providers/webnn/builders/impl/resize_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/resize_op_builder.cc index 9dc79f4f52f46..3442afbc2b3cd 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/resize_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/resize_op_builder.cc @@ -38,16 +38,33 @@ class ResizeOpBuilder : public BaseOpBuilder { }; // Helper functions -bool GetResizeScales(const InitializedTensorSet& initializers, - const Node& node, std::vector& scales, - const logging::Logger& logger) { +bool GetResizeScalesAndAxes(const InitializedTensorSet& initializers, + const Node& node, std::vector& scales, + std::vector& axes, const bool is_nhwc, + const logging::Logger& logger) { const auto& input_defs = node.InputDefs(); if (input_defs.size() < 3) return false; + const bool has_axes = !axes.empty(); const auto& scales_tensor = *initializers.at(input_defs[2]->Name()); - if (scales_tensor.dims_size() != 1 || scales_tensor.dims()[0] != 4) + if (scales_tensor.dims_size() != 1) { + LOGS(logger, ERROR) << "'scales' should be a 1D tensor."; return false; + } + + // Number of elements of 'scales' tensor. + const auto num_of_scales = scales_tensor.dims()[0]; + + if (has_axes && num_of_scales != 2) { + LOGS(logger, ERROR) << "When 'axes' is provided, 'scales' should have 2 elements."; + return false; + } + + if (!has_axes && num_of_scales != 4) { + LOGS(logger, ERROR) << "When 'axes' is not provided, 'scales' should have 4 elements."; + return false; + } std::vector unpacked_tensor; auto status = onnxruntime::utils::UnpackInitializerData(scales_tensor, unpacked_tensor); @@ -56,20 +73,65 @@ bool GetResizeScales(const InitializedTensorSet& initializers, return false; } const float* scales_data = reinterpret_cast(unpacked_tensor.data()); - scales = std::vector{scales_data, scales_data + 4}; + + if (has_axes) { + // 'axes' is specified since opset 18+, 'scales' should have 2 elements. + scales = std::vector{scales_data, scales_data + 2}; + } else { + // Before opset 18, 'scales' should have 4 elements. + // Make sure 'scales' is not trying to scale on N/C channels here. + std::vector onnx_scales{scales_data, scales_data + 4}; + // 'scales' input has been transposed to NHWC layout if it is NHWC preferred layout. + const float scale_n = onnx_scales[0]; + const float scale_c = is_nhwc ? onnx_scales[3] : onnx_scales[1]; + const float scale_h = is_nhwc ? onnx_scales[1] : onnx_scales[2]; + const float scale_w = is_nhwc ? onnx_scales[2] : onnx_scales[3]; + if (scale_n != 1.0f || scale_c != 1.0f) { + LOGS(logger, VERBOSE) << "Scales of N/C channel should be 1" + << "Scales of N/C channels are not supported" + << ", scale_n, " << scale_n << ", scale_c, " << scale_c; + return false; + } + + scales = {scale_h, scale_w}; + axes = {2, 3}; + } + + if (is_nhwc) { + // For NHWC preferred layout, we need to convert axes from NCHW to NHWC. + axes = convertAxesFromNCHWtoNHWC(axes); + } + return true; } -bool GetResizeOutputSizes(const InitializedTensorSet& initializers, - const Node& node, std::vector& sizes, - const logging::Logger& logger) { +bool GetResizeSizesAndAxes(const InitializedTensorSet& initializers, + const Node& node, std::vector& sizes, + std::vector& axes, const bool is_nhwc, + const gsl::span& input_shape, + const logging::Logger& logger) { const auto& input_defs = node.InputDefs(); if (input_defs.size() < 4) return false; + const bool has_axes = !axes.empty(); const auto& sizes_tensor = *initializers.at(input_defs[3]->Name()); - if (sizes_tensor.dims_size() != 1 || sizes_tensor.dims()[0] != 4) + if (sizes_tensor.dims_size() != 1) { + LOGS(logger, ERROR) << "'sizes' should be a 1D tensor."; + return false; + } + + // Number of elements of sizes tensor. + const auto num_of_sizes = sizes_tensor.dims()[0]; + if (has_axes && num_of_sizes != 2) { + LOGS(logger, ERROR) << "When 'axes' is provided, 'sizes' should have 2 elements."; + return false; + } + + if (!has_axes && num_of_sizes != 4) { + LOGS(logger, ERROR) << "When 'axes' is not provided, 'sizes' should have 4 elements."; return false; + } std::vector unpacked_tensor; auto status = onnxruntime::utils::UnpackInitializerData(sizes_tensor, unpacked_tensor); @@ -78,7 +140,35 @@ bool GetResizeOutputSizes(const InitializedTensorSet& initializers, return false; } const int64_t* sizes_data = reinterpret_cast(unpacked_tensor.data()); - sizes = std::vector{sizes_data, sizes_data + 4}; + + if (has_axes) { + // 'axes' is specified since opset 18+, 'sizes' should have 2 elements. + sizes = std::vector{sizes_data, sizes_data + 2}; + } else { + // Before opset 18, 'sizes' should have 4 elements. + // Make sure 'sizes' is not trying to resize on N/C channels here. + std::vector onnx_sizes{sizes_data, sizes_data + 4}; + auto size_n = onnx_sizes[0]; + const int c_idx = is_nhwc ? 3 : 1; + if (size_n != input_shape[0] || onnx_sizes[c_idx] != input_shape[c_idx]) { + LOGS(logger, VERBOSE) << "Output sizes of N/C chanel should match the input sizes, " + << "Resize of N/C channels are not supported" + << ", input_size_n, " << input_shape[0] << ", output_size_n, " << size_n + << ". input_size_c, " << input_shape[c_idx] << ", output_size_c, " << onnx_sizes[c_idx]; + return false; + } + // 'sizes' input has been transposed to NHWC layout if it is NHWC preferred layout. + const int64_t sizes_h = is_nhwc ? onnx_sizes[1] : onnx_sizes[2]; + const int64_t sizes_w = is_nhwc ? onnx_sizes[2] : onnx_sizes[3]; + sizes = {sizes_h, sizes_w}; + axes = {2, 3}; + } + + if (is_nhwc) { + // For NHWC preferred layout, we need to convert 'axes' from NCHW to NHWC. + axes = convertAxesFromNCHWtoNHWC(axes); + } + return true; } @@ -103,9 +193,15 @@ void ResizeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const N Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, const logging::Logger& logger) const { + const auto& input_defs = node.InputDefs(); + std::vector input_shape; + ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get shape"); + + const auto& initializers(model_builder.GetInitializerTensors()); + NodeAttrHelper helper(node); + emscripten::val options = emscripten::val::object(); options.set("label", node.Name()); - NodeAttrHelper helper(node); const auto mode = helper.Get("mode", "nearest"); if (mode == "linear") { options.set("mode", emscripten::val("linear")); @@ -113,45 +209,30 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, options.set("mode", emscripten::val("nearest-neighbor")); } - const auto& input_defs = node.InputDefs(); - const auto& initializers(model_builder.GetInitializerTensors()); - std::vector scales; - std::vector sizes; - std::vector scales_hw; - std::vector sizes_hw; - std::vector axes; - std::string scales_name = GetTensorName(input_defs, 2); + std::vector sizes; + std::vector webnn_sizes; + std::vector axes = GetResolvedAxes(helper, 4); // We already checked input shape is 4D in IsOpSupportedImpl. + std::string sizes_name = GetTensorName(input_defs, 3); const bool is_nhwc = model_builder.GetPreferredLayout() == DataLayout::NHWC; - if (!scales_name.empty()) { // Use scales. - ORT_RETURN_IF_NOT(GetResizeScales(initializers, node, scales, logger), "Error getting resize scales"); - if (is_nhwc) { - scales_hw = {scales[1], scales[2]}; - } else { - scales_hw = {scales[2], scales[3]}; - } - options.set("scales", emscripten::val::array(scales_hw)); - } else { // Use sizes, we already checked inputs in IsOpSupportedImpl. - std::vector output_sizes; - ORT_RETURN_IF_NOT(GetResizeOutputSizes(initializers, node, output_sizes, logger), - "Error getting resize output_sizes"); - std::transform(output_sizes.cbegin(), output_sizes.cend(), - std::back_inserter(sizes), - [](int64_t dim) -> int32_t { return SafeInt(dim); }); - if (is_nhwc) { - sizes_hw = {sizes[1], sizes[2]}; - } else { - sizes_hw = {sizes[2], sizes[3]}; - } - options.set("sizes", emscripten::val::array(sizes_hw)); - } - if (is_nhwc) { - axes = {1, 2}; + // We know we have either a 'scales' or 'sizes' input so this is safe. + // Check for 'sizes' first. + // This handles Resize-11 where 'scales' was a required input but 'sizes' were used if provided. + bool using_sizes = !sizes_name.empty() && Contains(initializers, sizes_name); + if (using_sizes) { + ORT_RETURN_IF_NOT(GetResizeSizesAndAxes(initializers, node, sizes, axes, is_nhwc, input_shape, logger), + "Error getting Resize sizes"); + webnn_sizes = GetVecUint32FromVecInt64(sizes); + options.set("sizes", emscripten::val::array(webnn_sizes)); } else { - axes = {2, 3}; + ORT_RETURN_IF_NOT(GetResizeScalesAndAxes(initializers, node, scales, axes, is_nhwc, logger), + "Error getting Resize scales"); + options.set("scales", emscripten::val::array(scales)); } - options.set("axes", emscripten::val::array(axes)); + + std::vector webnn_axes = GetVecUint32FromVecInt64(axes); + options.set("axes", emscripten::val::array(webnn_axes)); emscripten::val input = model_builder.GetOperand(input_defs[0]->Name()); emscripten::val output = model_builder.GetBuilder().call("resample2d", input, options); @@ -166,6 +247,7 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers const WebnnDeviceType /* device_type */, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); + NodeAttrHelper helper(node); std::vector input_shape; if (!GetShape(*input_defs[0], input_shape, logger)) @@ -179,92 +261,81 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers } { // Check attributes. - NodeAttrHelper helper(node); - const auto mode = helper.Get("mode", "nearest"); - bool is_linear_resize = mode == "linear"; - bool is_nearest_resize = mode == "nearest"; - // WebNN only supports "linear" and "nearest" modes. - if (!is_linear_resize && !is_nearest_resize) { - LOGS(logger, VERBOSE) << "Resize does not support input mode: " << mode; + // antialias + if (helper.Get("antialias", 0) != 0) { + LOGS(logger, VERBOSE) << "Resize does not support antialias"; return false; } - const auto exclude_outside = helper.Get("exclude_outside", 0); - if (exclude_outside != 0) { - LOGS(logger, VERBOSE) << "Resize does not support exclude_outside for now"; + // coordinate_transformation_mode + // Spec issue for supporting more coordinate transformation modes: + // https://github.com/webmachinelearning/webnn/issues/270 + const std::string coordinate_transformation_mode = helper.Get("coordinate_transformation_mode", "half_pixel"); + if (coordinate_transformation_mode != "half_pixel") { + LOGS(logger, VERBOSE) << "Resize does not support coordinate_transformation_mode: " + << coordinate_transformation_mode; return false; } - } - { // scales and sizes (if present) must be initializers. - const std::string scales_name = GetTensorName(input_defs, 2); - const std::string sizes_name = GetTensorName(input_defs, 3); - - // scales (scales may be empty tensor) - bool has_scales = !scales_name.empty(); - if ((has_scales && !Contains(initializers, scales_name)) || (!has_scales && node.SinceVersion() == 11)) { - LOGS(logger, VERBOSE) << "Input scales of Resize must be known"; + // exclude_outside + const auto exclude_outside = helper.Get("exclude_outside", 0); + if (exclude_outside != 0) { + LOGS(logger, VERBOSE) << "Resize does not support exclude_outside for now"; return false; } - // sizes (sizes may be empty tensor) - bool has_sizes = !sizes_name.empty(); - if (has_sizes && !Contains(initializers, sizes_name)) { - LOGS(logger, VERBOSE) << "Input sizes of Resize must be known"; + // keep_aspect_ratio_policy + const auto keep_aspect_ratio_policy = helper.Get("keep_aspect_ratio_policy", "stretch"); + if (keep_aspect_ratio_policy != "stretch") { + LOGS(logger, VERBOSE) << "Resize does not support keep_aspect_ratio_policy: " << keep_aspect_ratio_policy; return false; } - if (has_scales && has_sizes) { - LOGS(logger, VERBOSE) << "Only one of 'scales' and 'sizes' can be specified"; + // mode + const auto mode = helper.Get("mode", "nearest"); + bool is_linear_resize = mode == "linear"; + bool is_nearest_resize = mode == "nearest"; + // WebNN only supports "linear" and "nearest" modes. + if (!is_linear_resize && !is_nearest_resize) { + LOGS(logger, VERBOSE) << "Resize does not support input mode: " << mode; return false; } + } - const bool is_nhwc = node.Domain() == kMSInternalNHWCDomain; - // We want to check if the scales or sizes are not trying to resize on N/C channels here. - if (has_scales) { // We are using scales. - std::vector scales; - if (!GetResizeScales(initializers, node, scales, logger)) - return false; - - float scale_n = scales[0]; - float scale_c = is_nhwc ? scales[3] : scales[1]; - if (scale_n != 1.0f || scale_c != 1.0f) { - LOGS(logger, VERBOSE) << "Scales of N/C channel should be 1" - << "Resize of N/C channels are not supported" - << ", scale_n, " << scale_n << ", scale_c, " << scale_c; - return false; - } + { // 'scales' and 'sizes' (if present) must be non-empty initializers. + const std::string scales_name = GetTensorName(input_defs, 2); + const std::string sizes_name = GetTensorName(input_defs, 3); - // For now we only support upscale, so the scale_h and scale_w should be an integer >= 1. - // TODO support ResizeBilinear. - float scale_h = is_nhwc ? scales[1] : scales[2]; - float scale_w = is_nhwc ? scales[2] : scales[3]; + // Check for 'sizes' first. + // This handles Resize-11 where 'scales' was a required input but 'sizes' were used if provided. + // 'scales' or 'sizes' may be empty tensor. + bool using_sizes = !IsEmptyTensor(initializers, sizes_name); + bool using_scales = !using_sizes && !IsEmptyTensor(initializers, scales_name); - // Onnx spec requires scale to be a positive float, so we are not checking that here. - if (roundf(scale_h) != scale_h) { - LOGS(logger, VERBOSE) << "Resize: scale_h: " << scale_h << " is not a whole number"; - return false; - } + if (!using_scales && !using_sizes) { + LOGS(logger, VERBOSE) << "Resize: only one of 'scales' and 'sizes' can be specified"; + return false; + } - if (roundf(scale_w) != scale_w) { - LOGS(logger, VERBOSE) << "Resize: scale_w: " << scale_w << " is not a whole number"; + // 'axes' is from opset 18 on and allows 'scales' or 'sizes' to have entries for the subset of 'axes'. + // We fill with default values if necessary so that the processing is consistent across all supported opsets. + std::vector axes = GetResolvedAxes(helper, input_size); + if (!axes.empty()) { // We have 'axes' attribute. + if (axes.size() != 2 || axes[0] >= input_size || axes[1] >= input_size) { + LOGS(logger, VERBOSE) << "Resize: invalid axes attribute"; return false; } } - if (has_sizes) { - // We are using sizes. - std::vector output_sizes; - if (!GetResizeOutputSizes(initializers, node, output_sizes, logger)) + const bool is_nhwc = node.Domain() == kMSInternalNHWCDomain; + if (using_sizes) { // We are using 'sizes'. + std::vector sizes; + if (!GetResizeSizesAndAxes(initializers, node, sizes, axes, is_nhwc, input_shape, logger)) { return false; - - auto output_size_n = output_sizes[0]; - const int c_idx = is_nhwc ? 3 : 1; - if (output_size_n != input_shape[0] || output_sizes[c_idx] != input_shape[c_idx]) { - LOGS(logger, VERBOSE) << "Output sizes of N/C chanel should match the input sizes, " - << "Resize of N/C channels are not supported" - << ", input_size_n, " << input_shape[0] << ", output_size_n, " << output_size_n - << ". input_size_c, " << input_shape[c_idx] << ", output_size_c, " << output_sizes[c_idx]; + } + } else { // We are using 'scales'. + std::vector scales; + if (!GetResizeScalesAndAxes(initializers, node, scales, axes, is_nhwc, logger)) { return false; } }