Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CoreML: ML Program DepthToSpace #21426

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,18 @@ void AddOperationInput(MILSpec::Operation& op, std::string_view input_name, std:
(*op.mutable_inputs())[input_name] = std::move(arg);
}

void AddIntermediateOperationOutput(COREML_SPEC::MILSpec::Operation& op, const std::string& output_name,
int32_t element_type, std::optional<gsl::span<const int64_t>> shape) {
auto& outputs = *op.mutable_outputs();
auto& output_arg = *outputs.Add();
output_arg.set_name(output_name);

MILSpec::ValueType& value = *output_arg.mutable_type();
MILSpec::TensorType& tensor_type = *value.mutable_tensortype();

SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(element_type), shape, /*convert_scalar*/ true);
}

void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output,
std::optional<int32_t> override_element_type) {
auto& outputs = *op.mutable_outputs();
Expand Down
11 changes: 11 additions & 0 deletions onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,17 @@
std::string_view input_name, std::string_view value_name);

/// <summary>
/// Add an output to a MILSpec::Operation for an intermediate operation when the implementation is composed of
/// multiple MLProgram operations. In this case we don't have a NodeArg for the output.
/// </summary>
/// <param name="op">Operation to update.</param>
/// <param name="output_name">Name of the intermediate output. Create using ModelBuilder::GetUniqueName.</param>
/// <param name="element_type">onnx::TensorProto_DataType element type of the output.
/// int32_t as that is what TensorShapeProto uses to store the value.</param>
/// <param name="shape">Shape of the output if known.</param>
void AddIntermediateOperationOutput(COREML_SPEC::MILSpec::Operation& op, const std::string& output_name,

Check warning on line 141 in onnxruntime/core/providers/coreml/builders/impl/builder_utils.h

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <string> for string [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/coreml/builders/impl/builder_utils.h:141: Add #include <string> for string [build/include_what_you_use] [4]
int32_t element_type, std::optional<gsl::span<const int64_t>> shape);
/// <summary>
/// Add an output to a MILSpec::Operation. Name, data type and shape are used from the NodeArg.
/// </summary>
/// <param name="op">Operation to update.</param>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "core/common/safeint.h"
#include "core/providers/coreml/builders/helper.h"
#include "core/providers/coreml/builders/impl/base_op_builder.h"
#include "core/providers/coreml/builders/impl/builder_utils.h"
#include "core/providers/coreml/builders/model_builder.h"
#include "core/providers/coreml/builders/op_builder_factory.h"
#include "core/providers/coreml/shape_utils.h"
Expand All @@ -18,52 +19,133 @@ class DepthToSpaceOpBuilder : public BaseOpBuilder {

bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
const logging::Logger& logger) const override;

bool SupportsMLProgram() const override { return true; }
};

Status DepthToSpaceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
const Node& node,
const logging::Logger& /* logger */) const {
std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);

[[maybe_unused]] const logging::Logger& logger) const {
const auto& input_defs = node.InputDefs();
const auto& output_defs = node.OutputDefs();
const auto& input_name = input_defs[0]->Name();
const auto& output_name = output_defs[0]->Name();

uint64_t blocksize = SafeInt<uint64_t>(node.GetAttributes().at("blocksize").i());
NodeAttrHelper helper(node);
int64_t blocksize = *helper.GetInt64("blocksize"); // required attribute

#if defined(COREML_ENABLE_MLPROGRAM)
if (model_builder.CreateMLProgram()) {
using namespace CoreML::Specification::MILSpec; // NOLINT

const auto mode = helper.Get("mode", "DCR");

if (mode == "DCR") {
// DCR is directly supported
// https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.tensor_transformation.depth_to_space
// Validated with depth_to_space.py.
skottmckay marked this conversation as resolved.
Show resolved Hide resolved
auto op = model_builder.CreateOperation(node, "depth_to_space");
AddOperationInput(*op, "x", input_name);
AddOperationInput(*op, "block_size", model_builder.AddScalarConstant(op->type(), "blocksize", blocksize));
AddOperationOutput(*op, *output_defs[0]);
model_builder.AddOperation(std::move(op));
} else {
// CRD is manual. there may be a perf cost from the Reshape's (typically that happens on CPU) but if the input
// is a fixed size hopefully CoreML is smart enough to handle that aspect during model compilation instead
// of execution.

// https://github.com/onnx/onnx/blob/main/docs/Operators.md#depthtospace
// b, c, h, w = x.shape
// tmp = np.reshape(x, [b, c // (blocksize ** 2), blocksize, blocksize, h, w])
// tmp = np.transpose(tmp, [0, 1, 4, 2, 5, 3])
// y = np.reshape(tmp, [b, c // (blocksize ** 2), h * blocksize, w * blocksize])
//
// CoreML has a 5D limit, so we merge the batch dim into the channel dim as that doesn't change the data
// movement.
// First reshape is to [b * c // (blocksize ** 2), blocksize, blocksize, h, w]
// Transpose is to [0, 3, 1, 4, 2]

// we checked shape was static in IsOpSupportedImpl so this should never fail
std::vector<int64_t> input_shape;
ORT_RETURN_IF_NOT(GetStaticShape(*input_defs[0], input_shape, logger), "Failed to get input shape");
const int32_t elem_type = static_cast<int32_t>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);

// reshape to [b * c // (blocksize ** 2), blocksize, blocksize, h, w]
auto reshape1 = model_builder.CreateOperation(node, "reshape", "pre");
std::vector<int64_t> shape1 = {input_shape[0] * input_shape[1] / (blocksize * blocksize),
blocksize, blocksize, input_shape[2], input_shape[3]};
AddOperationInput(*reshape1, "x", input_name);
AddOperationInput(*reshape1, "shape", model_builder.AddConstant(reshape1->type(), "shape", shape1));
const auto& reshape1_output = model_builder.GetUniqueName(node, "reshape1");
AddIntermediateOperationOutput(*reshape1, reshape1_output, elem_type, shape1);

// transpose to [0, 3, 1, 4, 2]
auto transpose = model_builder.CreateOperation(node, "transpose");
std::vector<int64_t> perm = {0, 3, 1, 4, 2};
std::vector<int64_t> shape2 = {shape1[0], shape1[3], shape1[1], shape1[4], shape1[2]};
AddOperationInput(*transpose, "x", reshape1_output);
AddOperationInput(*transpose, "perm", model_builder.AddConstant(transpose->type(), "perm", perm));
const auto& transpose_output = model_builder.GetUniqueName(node, "transpose");
AddIntermediateOperationOutput(*transpose, transpose_output, elem_type, shape2);

// reshape to [b, c // (blocksize ** 2), h * blocksize, w * blocksize]
auto reshape2 = model_builder.CreateOperation(node, "reshape", "post");
std::vector<int64_t> shape3 = {input_shape[0],
input_shape[1] / (blocksize * blocksize),
input_shape[2] * blocksize,
input_shape[3] * blocksize};
AddOperationInput(*reshape2, "x", transpose_output);
AddOperationInput(*reshape2, "shape", model_builder.AddConstant(reshape2->type(), "shape", shape3));

AddOperationOutput(*reshape2, *output_defs[0]);

model_builder.AddOperation(std::move(reshape1));
model_builder.AddOperation(std::move(transpose));
model_builder.AddOperation(std::move(reshape2));
}
} else // NOLINT
#endif // if defined(COREML_ENABLE_MLPROGRAM)
{
const auto& output_name = output_defs[0]->Name();
std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);

auto* coreml_depthtospace = layer->mutable_reorganizedata();
coreml_depthtospace->set_blocksize(blocksize);
coreml_depthtospace->set_mode(CoreML::Specification::ReorganizeDataLayerParams_ReorganizationType::
ReorganizeDataLayerParams_ReorganizationType_DEPTH_TO_SPACE);
auto* coreml_depthtospace = layer->mutable_reorganizedata();
coreml_depthtospace->set_blocksize(static_cast<uint64_t>(blocksize));
coreml_depthtospace->set_mode(CoreML::Specification::ReorganizeDataLayerParams_ReorganizationType::
ReorganizeDataLayerParams_ReorganizationType_DEPTH_TO_SPACE);

*layer->mutable_input()->Add() = input_name;
*layer->mutable_output()->Add() = output_name;
*layer->mutable_input()->Add() = input_name;
*layer->mutable_output()->Add() = output_name;

model_builder.AddLayer(std::move(layer));
}

model_builder.AddLayer(std::move(layer));
return Status::OK();
}

bool DepthToSpaceOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /*input_params*/,
bool DepthToSpaceOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
const logging::Logger& logger) const {
const auto& input_defs = node.InputDefs();

std::vector<int64_t> input_shape;
if (!GetShape(*input_defs[0], input_shape, logger)) {
LOGS(logger, VERBOSE) << "DepthToSpace: no input shape";
return false;
}

const auto input_rank = input_shape.size();
if (input_rank < 4) {
LOGS(logger, VERBOSE) << "DepthToSpace does not support input shape of " << input_rank << "d shape.";
}
// ONNX and CoreML both require 4D input so no need to check the shape here.

NodeAttrHelper helper(node);
if (node.SinceVersion() >= 11) {
// For now, only DCR mode DepthToSpace is supported
const auto mode = helper.Get("mode", "DCR");
const auto mode = helper.Get("mode", "DCR");

if (input_params.create_mlprogram) {
if (mode == "CRD" && !IsStaticShape(input_shape)) {
// we need to manually implement the logic with a Reshape, so we need to know the shape to do that
LOGS(logger, VERBOSE) << "DepthToSpace: CRD mode requires static shape";
return false;
}
} else {
if (mode != "DCR") {
LOGS(logger, VERBOSE) << "The mode: " << mode << "of DepthToSpace is not supported in CoreML EP for now.";
LOGS(logger, VERBOSE) << "DepthToSpace: " << mode << " mode is not supported";
return false;
}
}
Expand Down
31 changes: 31 additions & 0 deletions onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -373,5 +373,36 @@ TEST(TensorOpTest, DepthToSpaceTest_5) {
test.Run();
}

TEST(TensorOpTest, DepthToSpaceTest_CRD_Batched) {
OpTester test("DepthToSpace", 11); // create an opset 11 model with attribute present = "CRD" mode
constexpr int64_t blocksize = 2;
test.AddAttribute("blocksize", blocksize);
test.AddAttribute("mode", "CRD");

constexpr int64_t N = 2, C = 4, H = 2, W = 3;
std::vector<float> X = {0., 1., 2.,
3., 4., 5.,
9., 10., 11.,
12., 13., 14.,
18., 19., 20.,
21., 22., 23.,
27., 28., 29.,
30., 31., 32.};

// append same data but in reverse order so we can tell if the batch output is wrong
X.insert(X.end(), X.rbegin(), X.rend());

test.AddInput<float>("input", {N, C, H, W}, X);

std::vector<float> result = {0., 9., 1., 10., 2., 11.,
18., 27., 19., 28., 20., 29.,
3., 12., 4., 13., 5., 14.,
21., 30., 22., 31., 23., 32.};
result.insert(result.end(), result.rbegin(), result.rend());

test.AddOutput<float>("output", {2, 1, 4, 6}, result);
test.Run();
}

} // namespace test
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Keep in sync with doco generated from /docs/execution-providers/CoreML-Execution
|ai.onnx:AveragePool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.|
|ai.onnx:Clip||
|ai.onnx:Conv|Only 1D/2D Conv is supported.<br/>Bias if provided must be constant.|
|ai.onnx.DepthToSpace|If 'mode' is 'CRD' the input must have a fixed shape.|
|ai.onnx:Div||
|ai.onnx:Gemm|Input B must be constant.|
|ai.onnx:GlobalAveragePool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.|
Expand Down
Loading