diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc index ebb3f97895f06..981d96336b38b 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc @@ -314,6 +314,18 @@ void AddOperationInput(MILSpec::Operation& op, std::string_view input_name, std: (*op.mutable_inputs())[input_name] = std::move(arg); } +void AddIntermediateOperationOutput(COREML_SPEC::MILSpec::Operation& op, const std::string& output_name, + int32_t element_type, std::optional> shape) { + auto& outputs = *op.mutable_outputs(); + auto& output_arg = *outputs.Add(); + output_arg.set_name(output_name); + + MILSpec::ValueType& value = *output_arg.mutable_type(); + MILSpec::TensorType& tensor_type = *value.mutable_tensortype(); + + SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(element_type), shape, /*convert_scalar*/ true); +} + void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output, std::optional override_element_type) { auto& outputs = *op.mutable_outputs(); diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h index f012e6af0d718..7ff1e6b8dbf26 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h @@ -130,6 +130,17 @@ void AddOperationInput(COREML_SPEC::MILSpec::Operation& op, std::string_view input_name, std::string_view value_name); /// +/// Add an output to a MILSpec::Operation for an intermediate operation when the implementation is composed of +/// multiple MLProgram operations. In this case we don't have a NodeArg for the output. +/// +/// Operation to update. +/// Name of the intermediate output. Create using ModelBuilder::GetUniqueName. +/// onnx::TensorProto_DataType element type of the output. +/// int32_t as that is what TensorShapeProto uses to store the value. +/// Shape of the output if known. +void AddIntermediateOperationOutput(COREML_SPEC::MILSpec::Operation& op, const std::string& output_name, + int32_t element_type, std::optional> shape); +/// /// Add an output to a MILSpec::Operation. Name, data type and shape are used from the NodeArg. /// /// Operation to update. diff --git a/onnxruntime/core/providers/coreml/builders/impl/depthtospace_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/depthtospace_op_builder.cc index 1eba312b2577b..bec2461ffbc52 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/depthtospace_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/depthtospace_op_builder.cc @@ -4,6 +4,7 @@ #include "core/common/safeint.h" #include "core/providers/coreml/builders/helper.h" #include "core/providers/coreml/builders/impl/base_op_builder.h" +#include "core/providers/coreml/builders/impl/builder_utils.h" #include "core/providers/coreml/builders/model_builder.h" #include "core/providers/coreml/builders/op_builder_factory.h" #include "core/providers/coreml/shape_utils.h" @@ -18,52 +19,133 @@ class DepthToSpaceOpBuilder : public BaseOpBuilder { bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const override; + + bool SupportsMLProgram() const override { return true; } }; Status DepthToSpaceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, - const logging::Logger& /* logger */) const { - std::unique_ptr layer = model_builder.CreateNNLayer(node); - + [[maybe_unused]] const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& output_defs = node.OutputDefs(); const auto& input_name = input_defs[0]->Name(); - const auto& output_name = output_defs[0]->Name(); - uint64_t blocksize = SafeInt(node.GetAttributes().at("blocksize").i()); + NodeAttrHelper helper(node); + int64_t blocksize = *helper.GetInt64("blocksize"); // required attribute + +#if defined(COREML_ENABLE_MLPROGRAM) + if (model_builder.CreateMLProgram()) { + using namespace CoreML::Specification::MILSpec; // NOLINT + + const auto mode = helper.Get("mode", "DCR"); + + if (mode == "DCR") { + // DCR is directly supported + // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.tensor_transformation.depth_to_space + // Validated with depth_to_space.py. + auto op = model_builder.CreateOperation(node, "depth_to_space"); + AddOperationInput(*op, "x", input_name); + AddOperationInput(*op, "block_size", model_builder.AddScalarConstant(op->type(), "blocksize", blocksize)); + AddOperationOutput(*op, *output_defs[0]); + model_builder.AddOperation(std::move(op)); + } else { + // CRD is manual. there may be a perf cost from the Reshape's (typically that happens on CPU) but if the input + // is a fixed size hopefully CoreML is smart enough to handle that aspect during model compilation instead + // of execution. + + // https://github.com/onnx/onnx/blob/main/docs/Operators.md#depthtospace + // b, c, h, w = x.shape + // tmp = np.reshape(x, [b, c // (blocksize ** 2), blocksize, blocksize, h, w]) + // tmp = np.transpose(tmp, [0, 1, 4, 2, 5, 3]) + // y = np.reshape(tmp, [b, c // (blocksize ** 2), h * blocksize, w * blocksize]) + // + // CoreML has a 5D limit, so we merge the batch dim into the channel dim as that doesn't change the data + // movement. + // First reshape is to [b * c // (blocksize ** 2), blocksize, blocksize, h, w] + // Transpose is to [0, 3, 1, 4, 2] + + // we checked shape was static in IsOpSupportedImpl so this should never fail + std::vector input_shape; + ORT_RETURN_IF_NOT(GetStaticShape(*input_defs[0], input_shape, logger), "Failed to get input shape"); + const int32_t elem_type = static_cast(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); + + // reshape to [b * c // (blocksize ** 2), blocksize, blocksize, h, w] + auto reshape1 = model_builder.CreateOperation(node, "reshape", "pre"); + std::vector shape1 = {input_shape[0] * input_shape[1] / (blocksize * blocksize), + blocksize, blocksize, input_shape[2], input_shape[3]}; + AddOperationInput(*reshape1, "x", input_name); + AddOperationInput(*reshape1, "shape", model_builder.AddConstant(reshape1->type(), "shape", shape1)); + const auto& reshape1_output = model_builder.GetUniqueName(node, "reshape1"); + AddIntermediateOperationOutput(*reshape1, reshape1_output, elem_type, shape1); + + // transpose to [0, 3, 1, 4, 2] + auto transpose = model_builder.CreateOperation(node, "transpose"); + std::vector perm = {0, 3, 1, 4, 2}; + std::vector shape2 = {shape1[0], shape1[3], shape1[1], shape1[4], shape1[2]}; + AddOperationInput(*transpose, "x", reshape1_output); + AddOperationInput(*transpose, "perm", model_builder.AddConstant(transpose->type(), "perm", perm)); + const auto& transpose_output = model_builder.GetUniqueName(node, "transpose"); + AddIntermediateOperationOutput(*transpose, transpose_output, elem_type, shape2); + + // reshape to [b, c // (blocksize ** 2), h * blocksize, w * blocksize] + auto reshape2 = model_builder.CreateOperation(node, "reshape", "post"); + std::vector shape3 = {input_shape[0], + input_shape[1] / (blocksize * blocksize), + input_shape[2] * blocksize, + input_shape[3] * blocksize}; + AddOperationInput(*reshape2, "x", transpose_output); + AddOperationInput(*reshape2, "shape", model_builder.AddConstant(reshape2->type(), "shape", shape3)); + + AddOperationOutput(*reshape2, *output_defs[0]); + + model_builder.AddOperation(std::move(reshape1)); + model_builder.AddOperation(std::move(transpose)); + model_builder.AddOperation(std::move(reshape2)); + } + } else // NOLINT +#endif // if defined(COREML_ENABLE_MLPROGRAM) + { + const auto& output_name = output_defs[0]->Name(); + std::unique_ptr layer = model_builder.CreateNNLayer(node); - auto* coreml_depthtospace = layer->mutable_reorganizedata(); - coreml_depthtospace->set_blocksize(blocksize); - coreml_depthtospace->set_mode(CoreML::Specification::ReorganizeDataLayerParams_ReorganizationType:: - ReorganizeDataLayerParams_ReorganizationType_DEPTH_TO_SPACE); + auto* coreml_depthtospace = layer->mutable_reorganizedata(); + coreml_depthtospace->set_blocksize(static_cast(blocksize)); + coreml_depthtospace->set_mode(CoreML::Specification::ReorganizeDataLayerParams_ReorganizationType:: + ReorganizeDataLayerParams_ReorganizationType_DEPTH_TO_SPACE); - *layer->mutable_input()->Add() = input_name; - *layer->mutable_output()->Add() = output_name; + *layer->mutable_input()->Add() = input_name; + *layer->mutable_output()->Add() = output_name; + + model_builder.AddLayer(std::move(layer)); + } - model_builder.AddLayer(std::move(layer)); return Status::OK(); } -bool DepthToSpaceOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /*input_params*/, +bool DepthToSpaceOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); std::vector input_shape; if (!GetShape(*input_defs[0], input_shape, logger)) { + LOGS(logger, VERBOSE) << "DepthToSpace: no input shape"; return false; } - const auto input_rank = input_shape.size(); - if (input_rank < 4) { - LOGS(logger, VERBOSE) << "DepthToSpace does not support input shape of " << input_rank << "d shape."; - } + // ONNX and CoreML both require 4D input so no need to check the shape here. NodeAttrHelper helper(node); - if (node.SinceVersion() >= 11) { - // For now, only DCR mode DepthToSpace is supported - const auto mode = helper.Get("mode", "DCR"); + const auto mode = helper.Get("mode", "DCR"); + + if (input_params.create_mlprogram) { + if (mode == "CRD" && !IsStaticShape(input_shape)) { + // we need to manually implement the logic with a Reshape, so we need to know the shape to do that + LOGS(logger, VERBOSE) << "DepthToSpace: CRD mode requires static shape"; + return false; + } + } else { if (mode != "DCR") { - LOGS(logger, VERBOSE) << "The mode: " << mode << "of DepthToSpace is not supported in CoreML EP for now."; + LOGS(logger, VERBOSE) << "DepthToSpace: " << mode << " mode is not supported"; return false; } } diff --git a/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc b/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc index 5222380d9ca56..a0c1d675f506f 100644 --- a/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc @@ -373,5 +373,36 @@ TEST(TensorOpTest, DepthToSpaceTest_5) { test.Run(); } +TEST(TensorOpTest, DepthToSpaceTest_CRD_Batched) { + OpTester test("DepthToSpace", 11); // create an opset 11 model with attribute present = "CRD" mode + constexpr int64_t blocksize = 2; + test.AddAttribute("blocksize", blocksize); + test.AddAttribute("mode", "CRD"); + + constexpr int64_t N = 2, C = 4, H = 2, W = 3; + std::vector X = {0., 1., 2., + 3., 4., 5., + 9., 10., 11., + 12., 13., 14., + 18., 19., 20., + 21., 22., 23., + 27., 28., 29., + 30., 31., 32.}; + + // append same data but in reverse order so we can tell if the batch output is wrong + X.insert(X.end(), X.rbegin(), X.rend()); + + test.AddInput("input", {N, C, H, W}, X); + + std::vector result = {0., 9., 1., 10., 2., 11., + 18., 27., 19., 28., 20., 29., + 3., 12., 4., 13., 5., 14., + 21., 30., 22., 31., 23., 32.}; + result.insert(result.end(), result.rbegin(), result.rend()); + + test.AddOutput("output", {2, 1, 4, 6}, result); + test.Run(); +} + } // namespace test } // namespace onnxruntime diff --git a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md index c33184686c932..322e4eed5f9c8 100644 --- a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md +++ b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md @@ -7,6 +7,7 @@ Keep in sync with doco generated from /docs/execution-providers/CoreML-Execution |ai.onnx:AveragePool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.| |ai.onnx:Clip|| |ai.onnx:Conv|Only 1D/2D Conv is supported.
Bias if provided must be constant.| +|ai.onnx.DepthToSpace|If 'mode' is 'CRD' the input must have a fixed shape.| |ai.onnx:Div|| |ai.onnx:Gemm|Input B must be constant.| |ai.onnx:GlobalAveragePool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.|