diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 38ed0b1640192..0c1e5e93c6844 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -679,7 +679,10 @@ if(onnxruntime_USE_RKNPU) endif() if(onnxruntime_USE_COREML) - list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/coreml/*) + list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/coreml/*.cc) + if(APPLE) + list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/coreml/*.mm) + endif() list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_coreml coreml_proto) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_coreml coreml_proto) list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_coreml coreml_proto) diff --git a/onnxruntime/core/providers/coreml/builders/impl/convtranspose_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/convtranspose_op_builder.cc new file mode 100644 index 0000000000000..5b6d9d72ab3c9 --- /dev/null +++ b/onnxruntime/core/providers/coreml/builders/impl/convtranspose_op_builder.cc @@ -0,0 +1,218 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/common.h" +#include "core/providers/coreml/builders/helper.h" +#include "core/providers/coreml/builders/impl/base_op_builder.h" +#include "core/providers/coreml/builders/impl/builder_utils.h" +#include "core/providers/coreml/builders/model_builder.h" +#include "core/providers/coreml/builders/op_builder_factory.h" +#include "core/providers/coreml/shape_utils.h" +#include "core/providers/shared/utils/utils.h" + +using namespace CoreML::Specification; + +namespace onnxruntime { +namespace coreml { + +class ConvTransposeOpBuilder : public BaseOpBuilder { + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, + const logging::Logger& logger) const override; + + bool IsOpSupportedImpl(const Node& /* node */, const OpBuilderInputParams& /* input_params */, + const logging::Logger& /* logger */) const override; + + bool SupportsMLProgram() const override { return true; } +}; + +Status ConvTransposeOpBuilder::AddToModelBuilderImpl([[maybe_unused]] ModelBuilder& model_builder, + [[maybe_unused]] const Node& node, + const logging::Logger& /*logger*/) const { +#if defined(COREML_ENABLE_MLPROGRAM) + using namespace CoreML::Specification::MILSpec; // NOLINT + const auto input_defs = node.InputDefs(); + const auto output_defs = node.OutputDefs(); + const auto& input_name = input_defs[0]->Name(); + + NodeAttrHelper helper(node); + + // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.conv.conv_transpose + std::unique_ptr op = model_builder.CreateOperation(node, "conv_transpose"); + const auto& op_type = op->type(); + + AddOperationInput(*op, "x", input_name); + AddOperationInput(*op, "weight", input_defs[1]->Name()); + + if (input_defs.size() > 2) { + AddOperationInput(*op, "bias", input_defs[2]->Name()); + } + + // we know this input has a valid shape due to the check in IsOpSupportedImpl. ignore N and C dims. + const auto num_spatial_dims = input_defs[1]->Shape()->dim_size() - 2; + + // Spec says strides/dilations/pads are optional but reality is they're required for at least the iOS15 target + // which is CoreML5. Due to that we just add everything for simplicity. + const auto strides = helper.Get("strides", std::vector(num_spatial_dims, 1)); + const auto dilations = helper.Get("dilations", std::vector(num_spatial_dims, 1)); + + AddOperationInput(*op, "strides", model_builder.AddConstant(op_type, "strides", strides)); + AddOperationInput(*op, "dilations", model_builder.AddConstant(op_type, "dilations", dilations)); + + const std::optional groups = helper.GetInt64("group"); + if (groups) { + AddOperationInput(*op, "groups", model_builder.AddScalarConstant(op_type, "groups", *groups)); + } + + // if we can enable output_shape, this code works. see IsOpSupportedImpl for the reason it's disabled. + // const auto output_shape = helper.GetInt64s("output_shape"); + // if (output_shape) { + // AddOperationInput(*op, "output_shape", model_builder.AddConstant(op_type, "output_shape", *output_shape)); + // // these are required despite the spec saying otherwise + // AddOperationInput(*op, "pad_type", model_builder.AddScalarConstant(op_type, "pad_type", std::string("valid"))); + // std::vector pads(num_spatial_dims * 2, 0); + // AddOperationInput(*op, "pad", model_builder.AddConstant(op_type, "pad", pads)); + //} else { + // AddPadTypeAndPads(*op, model_builder, op_type, helper, num_spatial_dims); + //} + + AddPadTypeAndPads(*op, model_builder, op_type, helper, num_spatial_dims); + + AddOperationOutput(*op, *output_defs[0]); + + model_builder.AddOperation(std::move(op)); +#endif // defined(COREML_ENABLE_MLPROGRAM) + + return Status::OK(); +} + +bool ConvTransposeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, + const logging::Logger& logger) const { + if (!input_params.create_mlprogram) { + LOGS(logger, VERBOSE) << "ConvTranspose: ML Program required"; + return false; + } + + // ML Program + // - const weight until CoreML7 (iOS17) + // - require constant for now as non-const would be unusual and we rely on the shape of W to be known to validate + // the kernel_shape can be used + // - const bias + // - const pad + // - if auto_pad is same_upper or same_lower the output[i] - (input[i] * strides[i]) must be divisible by 2 + // as the pads must be equally split as there's no upper/lower option in CoreML + // - punting on supporting this for now + // - must be symmetric for CoreML to do the right thing + // - const strides/dilations/groups + // - output_shape CoreML output is inconsistent so disabled for now + // + // NOTE: need to test with/without the COREML_FLAG_USE_CPU_ONLY flag being set to get an idea of how flaky the CoreML + // behavior is. + // Update /onnxruntime/test/util/default_providers.cc:DefaultCoreMLExecutionProvider to do so + + const auto& input_defs = node.InputDefs(); + + std::vector input_shape; + if (!GetShape(*input_defs[0], input_shape, logger)) { + // requires the rank at least to be known + LOGS(logger, VERBOSE) << "ConvTranspose: failed to get input shape"; + return false; + } + + // for simplicity require weight to be constant + const auto& weight_arg = *input_defs[1]; + const auto& weight_name = input_defs[1]->Name(); + const auto* weight = input_params.graph_viewer.GetConstantInitializer(weight_name); + if (!weight) { + LOGS(logger, VERBOSE) << "ConvTranspose: weight must be constant"; + return false; + } + + if (input_defs.size() > 2 && !input_params.graph_viewer.GetConstantInitializer(input_defs[2]->Name())) { + LOGS(logger, VERBOSE) << "ConvTranspose: bias must be constant"; + return false; + } + + std::vector weight_shape; + if (!GetShape(weight_arg, weight_shape, logger)) { + // impossible as it's a constant initializer + LOGS(logger, VERBOSE) << "ConvTranspose: failed to get weight shape"; + return false; + } + + int64_t num_spatial_dims = narrow(weight_shape.size()) - 2; + + NodeAttrHelper helper(node); + + // Punt on SAME_UPPER/SAME_LOWER for now. + // We could infer that 'same' -> 'same_upper' based on the CoreML conv spec having 'same' and 'same_lower' but + // need to validate that assertion. + // Additionally, if the pads size is equal, there's no difference between same_upper and same_lower. + // To do that we'd need the 'output_shape' attribute to check against. + // Can add this handling if/when needed. + auto autopad = StringToAutoPadType(helper.Get("auto_pad", "NOTSET")); + if (autopad == AutoPadType::SAME_LOWER || autopad == AutoPadType::SAME_UPPER) { + LOGS(logger, VERBOSE) << "ConvTranspose: support for SAME_LOWER/SAME_UPPER is not implemented yet"; + return false; + } else if (autopad == AutoPadType::NOTSET) { + // CoreML output is inconsistent between CPU_ONLY and ALL if the pads aren't all the same value. + // CPU matches the expected output, but other devices don't seem to (at least on macOS). + auto onnx_pads = *helper.GetInt64s("pads"); // 'pads' are required if auto_pad is NOTSET + const auto pad_value = onnx_pads[0]; + if (!std::all_of(onnx_pads.begin() + 1, onnx_pads.end(), + [pad_value](auto value) { return value == pad_value; })) { + LOGS(logger, VERBOSE) << "ConvTranspose: all pad values must be the same for CoreML to return " + "consistent results"; + return false; + } + } + + // there's no input to specify a kernel shape in CoreML. + // it's OK if a specified kernel_shape matches kH and kW dims of the weight input. + auto kernel_shape = helper.GetInt64s("kernel_shape"); + if (kernel_shape) { + bool valid = true; + + if (static_cast(kernel_shape->size()) == num_spatial_dims) { + for (int i = 0; i < num_spatial_dims; ++i) { + // check the specified kernel shape matches the weight shape. skip the initial N and C dims in the latter. + if ((*kernel_shape)[i] != weight_shape[i + 2]) { + valid = false; + break; + } + } + } else { + valid = false; + } + + if (!valid) { + LOGS(logger, VERBOSE) << "ConvTranspose: kernel_shape attribute does not match the weight shape"; + return false; + } + } + + // In theory this can be supported, but running with COREML_FLAG_USE_CPU_ONLY produces output that doesn't match + // ONNX. Running without that flag produces the expected output. Madness... + auto output_shape = helper.GetInt64s("output_shape"); + if (output_shape) { + LOGS(logger, VERBOSE) << "ConvTranspose: output_shape is not supported as the CoreML output is inconsistent"; + return false; + } + + // output_padding, if specified, must be the default value of all zeros as there's no equivalent in CoreML. + auto output_padding = helper.GetInt64s("output_padding"); + if (output_padding && + std::any_of(output_padding->begin(), output_padding->end(), [](auto value) { return value != 0; })) { + LOGS(logger, VERBOSE) << "ConvTranspose: output_padding is not supported"; + return false; + } + + return true; +} + +void CreateConvTransposeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) { + op_registrations.builders.push_back(std::make_unique()); + op_registrations.op_builder_map.emplace(op_type, op_registrations.builders.back().get()); +} + +} // namespace coreml +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc index 65b5c17f2c6a6..7ff66e4a79e37 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc @@ -427,13 +427,13 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPa auto h_in = input_shape[input_rank - 2]; auto w_in = input_shape[input_rank - 1]; - if (!utils::IsScalingByAFactorOfN(h_in, scale_h)) { + if (!utils::ReciprocalIsAFactorOfN(h_in, scale_h)) { LOGS(logger, VERBOSE) << "Resize: downsampling scale " << scale_h << " is not a factor of input height: " << h_in; return false; } - if (!utils::IsScalingByAFactorOfN(w_in, scale_w)) { + if (!utils::ReciprocalIsAFactorOfN(w_in, scale_w)) { LOGS(logger, VERBOSE) << "Resize: downsampling scale " << scale_w << " is not a factor of input width: " << w_in; return false; diff --git a/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc b/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc index b17827f8e0532..535712f096010 100644 --- a/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc +++ b/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc @@ -15,120 +15,56 @@ namespace coreml { static OpBuilderRegistrations CreateOpBuilderRegistrations() { OpBuilderRegistrations op_registrations; - { // Add/Mul/Pow/Sub/Div - CreateBinaryOpBuilder("Add", op_registrations); - CreateBinaryOpBuilder("Mul", op_registrations); - CreateBinaryOpBuilder("Pow", op_registrations); - CreateBinaryOpBuilder("Sub", op_registrations); - CreateBinaryOpBuilder("Div", op_registrations); - } - - { // Activations - CreateActivationOpBuilder("Sigmoid", op_registrations); - CreateActivationOpBuilder("Tanh", op_registrations); - CreateActivationOpBuilder("Relu", op_registrations); - CreateActivationOpBuilder("PRelu", op_registrations); - CreateActivationOpBuilder("LeakyRelu", op_registrations); - } - - { // Transpose - CreateTransposeOpBuilder("Transpose", op_registrations); - } - - { // Conv - CreateConvOpBuilder("Conv", op_registrations); - } - - { // Batch Normalization - CreateBatchNormalizationOpBuilder("BatchNormalization", op_registrations); - } - - { // Reshape - CreateReshapeOpBuilder("Reshape", op_registrations); - } - - { // DepthToSpace - CreateDepthToSpaceOpBuilder("DepthToSpace", op_registrations); - } - - { // Pool - CreatePoolOpBuilder("GlobalAveragePool", op_registrations); - CreatePoolOpBuilder("GlobalMaxPool", op_registrations); - CreatePoolOpBuilder("AveragePool", op_registrations); - CreatePoolOpBuilder("MaxPool", op_registrations); - } - - { // Concat - CreateConcatOpBuilder("Concat", op_registrations); - } - - { // Resize - CreateResizeOpBuilder("Resize", op_registrations); - } - - { // Gemm/MatMul - CreateGemmOpBuilder("Gemm", op_registrations); - CreateGemmOpBuilder("MatMul", op_registrations); - } - - { // Clip - CreateClipOpBuilder("Clip", op_registrations); - } - - { // Squeeze - CreateSqueezeOpBuilder("Squeeze", op_registrations); - } - - { // ArgMax - CreateArgMaxOpBuilder("ArgMax", op_registrations); - } - - { // Cast - CreateCastOpBuilder("Cast", op_registrations); - } - - { // Flatten - CreateFlattenOpBuilder("Flatten", op_registrations); - } - - { // LRN - CreateLRNOpBuilder("LRN", op_registrations); - } - - { // Pad - CreatePadOpBuilder("Pad", op_registrations); - } - - { // Unary - CreateUnaryOpBuilder("Sqrt", op_registrations); - CreateUnaryOpBuilder("Reciprocal", op_registrations); - } - - { // Reduction - // ReduceMean is used in layer normalization which seems to be problematic in Python tests. - CreateReductionOpBuilder("ReduceMean", op_registrations); - CreateReductionOpBuilder("ReduceSum", op_registrations); - } - - { // Shape - CreateShapeOpBuilder("Shape", op_registrations); - } - - { // Gather - CreateGatherOpBuilder("Gather", op_registrations); - } - - { // Slice - CreateSliceOpBuilder("Slice", op_registrations); - } - - { // Softmax - CreateSoftmaxOpBuilder("Softmax", op_registrations); - } - - { // Split - CreateSplitOpBuilder("Split", op_registrations); - } + // Unary ops + CreateUnaryOpBuilder("Sqrt", op_registrations); + CreateUnaryOpBuilder("Reciprocal", op_registrations); + + // Binary elementwise ops + CreateBinaryOpBuilder("Add", op_registrations); + CreateBinaryOpBuilder("Mul", op_registrations); + CreateBinaryOpBuilder("Pow", op_registrations); + CreateBinaryOpBuilder("Sub", op_registrations); + CreateBinaryOpBuilder("Div", op_registrations); + + // Activations + CreateActivationOpBuilder("Sigmoid", op_registrations); + CreateActivationOpBuilder("Tanh", op_registrations); + CreateActivationOpBuilder("Relu", op_registrations); + CreateActivationOpBuilder("PRelu", op_registrations); + CreateActivationOpBuilder("LeakyRelu", op_registrations); + + // Pooling ops + CreatePoolOpBuilder("GlobalAveragePool", op_registrations); + CreatePoolOpBuilder("GlobalMaxPool", op_registrations); + CreatePoolOpBuilder("AveragePool", op_registrations); + CreatePoolOpBuilder("MaxPool", op_registrations); + + // Reduction ops + CreateReductionOpBuilder("ReduceMean", op_registrations); + CreateReductionOpBuilder("ReduceSum", op_registrations); + + CreateArgMaxOpBuilder("ArgMax", op_registrations); + CreateBatchNormalizationOpBuilder("BatchNormalization", op_registrations); + CreateCastOpBuilder("Cast", op_registrations); + CreateClipOpBuilder("Clip", op_registrations); + CreateConcatOpBuilder("Concat", op_registrations); + CreateConvOpBuilder("Conv", op_registrations); + CreateConvTransposeOpBuilder("ConvTranspose", op_registrations); + CreateDepthToSpaceOpBuilder("DepthToSpace", op_registrations); + CreateFlattenOpBuilder("Flatten", op_registrations); + CreateGatherOpBuilder("Gather", op_registrations); + CreateGemmOpBuilder("Gemm", op_registrations); + CreateLRNOpBuilder("LRN", op_registrations); + CreateGemmOpBuilder("MatMul", op_registrations); + CreatePadOpBuilder("Pad", op_registrations); + CreateReshapeOpBuilder("Reshape", op_registrations); + CreateResizeOpBuilder("Resize", op_registrations); + CreateShapeOpBuilder("Shape", op_registrations); + CreateSliceOpBuilder("Slice", op_registrations); + CreateSplitOpBuilder("Split", op_registrations); + CreateSoftmaxOpBuilder("Softmax", op_registrations); + CreateSqueezeOpBuilder("Squeeze", op_registrations); + CreateTransposeOpBuilder("Transpose", op_registrations); CreateGridSampleOpBuilder("GridSample", op_registrations); diff --git a/onnxruntime/core/providers/coreml/builders/op_builder_factory.h b/onnxruntime/core/providers/coreml/builders/op_builder_factory.h index a9a8ab90b0863..1990fb6400ce1 100644 --- a/onnxruntime/core/providers/coreml/builders/op_builder_factory.h +++ b/onnxruntime/core/providers/coreml/builders/op_builder_factory.h @@ -24,6 +24,7 @@ void CreateCastOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_ void CreateClipOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); void CreateConcatOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); void CreateConvOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); +void CreateConvTransposeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); void CreateDepthToSpaceOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); void CreateFlattenOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); void CreateGatherOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); diff --git a/onnxruntime/core/providers/coreml/model/model.h b/onnxruntime/core/providers/coreml/model/model.h index c4c3b38bba516..75b9aaf2185c9 100644 --- a/onnxruntime/core/providers/coreml/model/model.h +++ b/onnxruntime/core/providers/coreml/model/model.h @@ -13,6 +13,10 @@ #include "core/common/status.h" #include "core/platform/ort_mutex.h" +#if defined(__OBJC__) +@class MLMultiArray; +#endif + namespace onnxruntime { namespace coreml { @@ -32,6 +36,15 @@ using GetOutputTensorMutableRawDataFn = std::function static_shape)>; +#if defined(__OBJC__) +// helper function that we unit test. +// Handles an MLMultiArray that is contiguous, or has one non-contiguous dimension. +// The output values can be used to copy the array data to a contiguous buffer. +// Loop num_blocks times, copying block_size elements each time, moving stride elements between copies. +// A contiguous array will have num_blocks == 1, block_size == total_size (i.e. can be copied in a single operation) +Status GetMLMultiArrayCopyInfo(const MLMultiArray* array, int64_t& num_blocks, int64_t& block_size, int64_t& stride); +#endif + class Model { public: Model(const std::string& path, diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 1d506099b4367..4fd822f0d0d15 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -174,51 +174,69 @@ Status CreateInputFeatureProvider(const std::unordered_map mlmultiarray_buffer_size) { + const MLMultiArray* array, + const int64_t num_blocks, const int64_t block_size, const int64_t stride, + const OnnxTensorInfo* tensor_info) { if (mlmultiarray_buffer == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "mlmultiarray_buffer has no data"); } - const size_t num_elements = array_info.count; + // total including non-contiguous space + + int64_t array_total_elements = [array.strides[0] longLongValue] * [array.shape[0] longLongValue]; + const int64_t num_elements = array.count; + + ORT_RETURN_IF(array_total_elements != num_blocks * stride || + num_elements != num_blocks * block_size, + "MLMultiArray size does not match the copy info"); + const auto onnx_data_type = tensor_info->data_type; switch (onnx_data_type) { case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { - const auto output_data_byte_size = num_elements * sizeof(float); - ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == output_data_byte_size, - "CoreML output buffer size and expected output size differ"); - memcpy(tensor_buffer, mlmultiarray_buffer, output_data_byte_size); + const auto* src_buffer = static_cast(mlmultiarray_buffer); + auto* dst_buffer = static_cast(tensor_buffer); + const auto block_byte_size = block_size * sizeof(float); + + for (int64_t idx = 0; idx < num_blocks; ++idx) { + memcpy(dst_buffer, src_buffer, block_byte_size); + src_buffer += stride; + dst_buffer += block_size; + } break; } case ONNX_NAMESPACE::TensorProto_DataType_INT32: { - const auto output_data_byte_size = num_elements * sizeof(int32_t); - ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == output_data_byte_size, - "CoreML output buffer size and expected output size differ"); - memcpy(tensor_buffer, mlmultiarray_buffer, output_data_byte_size); + const auto* src_buffer = static_cast(mlmultiarray_buffer); + auto* dst_buffer = static_cast(tensor_buffer); + const auto block_byte_size = block_size * sizeof(int32_t); + + for (int64_t idx = 0; idx < num_blocks; ++idx) { + memcpy(dst_buffer, src_buffer, block_byte_size); + src_buffer += stride; + dst_buffer += block_size; + } + break; } // For this case, since Coreml Spec only uses int32 for model output while onnx provides // int64 for model output data type. We are doing a type casting (int32 -> int64) here // when copying the model to ORT case ONNX_NAMESPACE::TensorProto_DataType_INT64: { - ORT_RETURN_IF_NOT(array_info.dataType == MLMultiArrayDataTypeInt32, - "CoreML output data type is not MLMultiArrayDataTypeInt32"); - ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == num_elements * sizeof(int32_t), - "CoreML output buffer size and expected output size differ"); - const auto model_output_span = gsl::span{static_cast(mlmultiarray_buffer), num_elements}; - const auto output_span = gsl::span{static_cast(tensor_buffer), num_elements}; - std::transform(model_output_span.begin(), model_output_span.end(), output_span.begin(), - [](int32_t v) { return static_cast(v); }); + ORT_RETURN_IF(array.dataType != MLMultiArrayDataTypeInt32, + "CoreML output data type is not MLMultiArrayDataTypeInt32"); + + const int32_t* src_buffer = static_cast(mlmultiarray_buffer); + int64_t* dst_buffer = static_cast(tensor_buffer); + + for (int64_t idx = 0; idx < num_blocks; ++idx) { + auto input_span = gsl::span{src_buffer, static_cast(block_size)}; + auto output_span = gsl::span{dst_buffer, static_cast(block_size)}; + std::transform(input_span.begin(), input_span.end(), output_span.begin(), + [](int32_t v) { return static_cast(v); }); + + src_buffer += stride; + dst_buffer += block_size; + } break; } default: @@ -250,8 +268,7 @@ - (void)dealloc; - (Status)loadModel API_AVAILABLE_COREML3; - (Status)predict:(const std::unordered_map&)inputs outputs:(const std::unordered_map&)outputs - getOutputTensorDataFn:(const GetOutputTensorMutableRawDataFn&) - get_output_tensor_mutable_raw_data_fn + getOutputTensorDataFn:(const GetOutputTensorMutableRawDataFn&)get_output_tensor_mutable_raw_data_fn API_AVAILABLE_COREML3; @property(nullable) MLModel* model API_AVAILABLE_COREML3; @@ -397,21 +414,27 @@ - (Status)predict:(const std::unordered_map&)inputs ") do not match"); } - ORT_RETURN_IF_NOT(IsArrayContiguous(data), - "Non-contiguous output MLMultiArray is not currently supported"); + // support a non-contiguous array, provided only one dimension is not contiguous + int64_t num_blocks = 0; + int64_t block_size = 0; + int64_t stride = 0; + + ORT_RETURN_IF_ERROR(GetMLMultiArrayCopyInfo(data, num_blocks, block_size, stride)); + __block Status copy_status; const auto* tensor_info = &output_tensor_info; // `getBytesWithHandler` replaces deprecated `.dataPointer` on new versions if (@available(macOS 12.3, iOS 15.4, *)) { [data getBytesWithHandler:^(const void* bytes, NSInteger size) { - copy_status = CopyMLMultiArrayBuffer(bytes, output_buffer, data, tensor_info, size); + copy_status = CopyMLMultiArrayBuffer(bytes, output_buffer, data, + num_blocks, block_size, stride, tensor_info); }]; } else { - // disable size check as old API does not return buffer length - copy_status = CopyMLMultiArrayBuffer(data.dataPointer, output_buffer, data, tensor_info, std::nullopt); + copy_status = CopyMLMultiArrayBuffer(data.dataPointer, output_buffer, data, + num_blocks, block_size, stride, tensor_info); } - if (!copy_status.IsOK()) - return copy_status; + + ORT_RETURN_IF_ERROR(copy_status); } } } @@ -431,6 +454,49 @@ - (Status)predict:(const std::unordered_map&)inputs namespace onnxruntime { namespace coreml { +Status GetMLMultiArrayCopyInfo(const MLMultiArray* _Nonnull array, + int64_t& num_blocks, int64_t& block_size, int64_t& stride) { + const auto* shape = array.shape; + const auto rank = shape.count; + + int64_t array_total_elements = [array.strides[0] longLongValue] * [shape[0] longLongValue]; + + int64_t data_elems = 1; // actual values + int64_t total_elems = 1; // elems including empty slots if non-contiguous + for (unsigned long i = 1; i <= rank; i++) { + int64_t this_stride = [array.strides[rank - i] longLongValue]; + if (this_stride != total_elems) { + // non-contiguous + if (block_size != 0) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, + "Multiple non-contiguous dimensions in MLMultiArray are not supported."); + } + + block_size = data_elems; + stride = this_stride; + } + + const auto elems_this_dim = [shape[rank - i] longLongValue]; + data_elems *= elems_this_dim; + total_elems = elems_this_dim * this_stride; + } + + if (block_size == 0) { + // all data is contiguous + block_size = data_elems; + stride = array_total_elements; + assert(block_size == stride); + } + + num_blocks = data_elems / block_size; + + ORT_ENFORCE(array_total_elements == total_elems, "Logic error calculating copy info"); + ORT_ENFORCE(stride >= block_size, "Logic error calculating copy info"); + ORT_ENFORCE(stride * num_blocks == total_elems, "Logic error calculating copy info"); + + return Status::OK(); +} + // Internal Execution class // This class will bridge Model (c++) with CoreMLExecution (objective c++) class Execution { diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc index ef27f6c942f44..44403010c936c 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc @@ -274,8 +274,8 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const GraphViewer& graph_viewer, const N return false; } - if (!utils::IsScalingByAFactorOfN(h_in, scale_h) || - !utils::IsScalingByAFactorOfN(w_in, scale_w)) { + if (!utils::ReciprocalIsAFactorOfN(h_in, scale_h) || + !utils::ReciprocalIsAFactorOfN(w_in, scale_w)) { LOGS_DEFAULT(VERBOSE) << "Input size must be evenly divisible by output size when downsampling"; return false; } diff --git a/onnxruntime/core/providers/utils.cc b/onnxruntime/core/providers/utils.cc index 747b09e42aa21..2725af95e0959 100644 --- a/onnxruntime/core/providers/utils.cc +++ b/onnxruntime/core/providers/utils.cc @@ -24,7 +24,7 @@ common::Status OutputOptionalWithoutDataHelper(const ONNX_NAMESPACE::TypeProto& } #endif -bool IsScalingByAFactorOfN(int64_t n, float scale) { +bool ReciprocalIsAFactorOfN(int64_t n, float scale) { bool is_factor = false; if (scale > 0.f && scale < 1.f) { const double factor = 1.0 / scale; diff --git a/onnxruntime/core/providers/utils.h b/onnxruntime/core/providers/utils.h index 9ea8496a02f85..cfd71d9b838b3 100644 --- a/onnxruntime/core/providers/utils.h +++ b/onnxruntime/core/providers/utils.h @@ -19,6 +19,6 @@ common::Status OutputOptionalWithoutDataHelper(const ONNX_NAMESPACE::TypeProto& /// Check if the reciprocal of 'scale' is a factor of 'n'. /// e.g. a scale of 0.5 is 1/2, the reciprocal is 2, and 2 is a factor of any even number. /// -bool IsScalingByAFactorOfN(int64_t n, float scale); +bool ReciprocalIsAFactorOfN(int64_t n, float scale); } // namespace utils } // namespace onnxruntime diff --git a/onnxruntime/core/providers/xnnpack/nn/conv_transpose.cc b/onnxruntime/core/providers/xnnpack/nn/conv_transpose.cc index c136385f12476..01c8119fea79d 100644 --- a/onnxruntime/core/providers/xnnpack/nn/conv_transpose.cc +++ b/onnxruntime/core/providers/xnnpack/nn/conv_transpose.cc @@ -24,7 +24,7 @@ Status ConvTranspose::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr const auto rank = orig_shape.NumDimensions(); if (conv_transpose_attrs_.group > 1) { - // Xnnpack [G, Oc, H, W Ic/G] + // Xnnpack [G, Oc, H, W, Ic/G] // (ref: https://github.com/google/XNNPACK/blob/ecd8311c8fd3d9ab47edbc3df5f2b5de7dabe75f/test/deconvolution-operator-tester.h#L678) if (rank == 4) { // split C (dim 0) into {group, C/group} diff --git a/onnxruntime/core/providers/xnnpack/tensor/resize.cc b/onnxruntime/core/providers/xnnpack/tensor/resize.cc index c752b5f849808..cf874796ba169 100644 --- a/onnxruntime/core/providers/xnnpack/tensor/resize.cc +++ b/onnxruntime/core/providers/xnnpack/tensor/resize.cc @@ -85,8 +85,8 @@ bool Resize::IsOnnxNodeSupported(const NodeUnit& node_unit, float scale_h = scales[2]; float scale_w = scales[3]; - if (!utils::IsScalingByAFactorOfN(h_in, scale_h) || - !utils::IsScalingByAFactorOfN(w_in, scale_w)) { + if (!utils::ReciprocalIsAFactorOfN(h_in, scale_h) || + !utils::ReciprocalIsAFactorOfN(w_in, scale_w)) { break; } } diff --git a/onnxruntime/test/providers/coreml/utils_test.mm b/onnxruntime/test/providers/coreml/utils_test.mm new file mode 100644 index 0000000000000..f55f108494e3e --- /dev/null +++ b/onnxruntime/test/providers/coreml/utils_test.mm @@ -0,0 +1,108 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#import + +#include "gtest/gtest.h" +#include "gmock/gmock.h" + +#include "core/providers/coreml/model/model.h" +#include "test/util/include/asserts.h" + +namespace onnxruntime { +namespace test { +namespace { +auto ValidateGetInfo(MLMultiArray* array, + int64_t expected_num_blocks, int64_t expected_block_size, int64_t expected_stride, + bool expect_valid) { + int64_t num_blocks = 0; + int64_t block_size = 0; + int64_t stride = 0; + auto status = coreml::GetMLMultiArrayCopyInfo(array, num_blocks, block_size, stride); + + if (!expect_valid) { + ASSERT_STATUS_NOT_OK(status); + return; + } + + ASSERT_STATUS_OK(status); + ASSERT_EQ(num_blocks, expected_num_blocks); + ASSERT_EQ(block_size, expected_block_size); + ASSERT_EQ(stride, expected_stride); +} +} // namespace + +TEST(CoreMLUtils, GetMLMultiArrayReadInfo) { + // fake pointer. we don't read any data but initWithDataPointer requires a non-null address + void* data = reinterpret_cast(0xfeedf00d); + + // a dim is non-contiguous if the stride is > the total number of elements in its inner dimensions + + // dim -1 with non-contiguous data. 1 element (as it's the inner-most dimension) but the stride is 2. + { + NSArray* shape = @[ @1, @1, @8, @8 ]; + NSArray* strides = @[ @128, @128, @16, @2 ]; + + auto* array = [[MLMultiArray alloc] initWithDataPointer:data + shape:shape + dataType:MLMultiArrayDataTypeInt32 + strides:strides + deallocator:^(void* /* bytes */) { + } + error:nil]; + ValidateGetInfo(array, 64, 1, 2, true); + } + + // dim -2 with non-contiguous data. 8 elements in the inner dimension but the stride is 16. + { + NSArray* shape = @[ @1, @1, @8, @8 ]; + NSArray* strides = @[ @128, @128, @16, @1 ]; + + auto* array = [[MLMultiArray alloc] initWithDataPointer:data + shape:shape + dataType:MLMultiArrayDataTypeInt32 + strides:strides + deallocator:^(void* /* bytes */) { + } + error:nil]; + ValidateGetInfo(array, 8, 8, 16, true); + } + + // dim -3 with non-contiguous data. 16 elements in the inner dimensions but stride is 24. + { + NSArray* shape = @[ @1, @2, @4, @4 ]; + NSArray* strides = @[ @48, @24, @4, @1 ]; + + auto* array = [[MLMultiArray alloc] initWithDataPointer:data + shape:shape + dataType:MLMultiArrayDataTypeInt32 + strides:strides + deallocator:^(void* /* bytes */) { + } + error:nil]; + + ValidateGetInfo(array, 2, 16, 24, true); + } + + // two non-contiguous dims (dim -2 and dim -3) + // dim -2 has 4 elements in the inner dimension and stride of 8 + // dim -3 has 32 elements in the inner dimensions (we need to include the empty elements from the non-contiguous data + // in dim -2) and stride of 48 + { + // dim + NSArray* shape = @[ @1, @2, @4, @4 ]; + NSArray* strides = @[ @96, @48, @8, @1 ]; + + auto* array = [[MLMultiArray alloc] initWithDataPointer:data + shape:shape + dataType:MLMultiArrayDataTypeInt32 + strides:strides + deallocator:^(void* /* bytes */) { + } + error:nil]; + + ValidateGetInfo(array, 0, 0, 0, false); + } +} +} // namespace test +} // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/nn/conv_transpose_op_test.cc b/onnxruntime/test/providers/cpu/nn/conv_transpose_op_test.cc index 81191e9b48c3c..2bf53ce5b5986 100644 --- a/onnxruntime/test/providers/cpu/nn/conv_transpose_op_test.cc +++ b/onnxruntime/test/providers/cpu/nn/conv_transpose_op_test.cc @@ -27,7 +27,7 @@ void TestConvTransposeOpInitializer(const ConvTransposeOpAttributes& attributes, const vector>& input_shapes, const std::initializer_list& expected_output, const vector& expected_output_shape, - bool is_filter_initializer = false, + bool is_weight_and_bias_initializer = false, OpTester::ExpectResult expect_result = OpTester::ExpectResult::kExpectSuccess, const std::string& err_str = "", const std::unordered_set& excluded_provider_types = {kTensorrtExecutionProvider}) { @@ -58,10 +58,10 @@ void TestConvTransposeOpInitializer(const ConvTransposeOpAttributes& attributes, } ORT_ENFORCE(inputs.size() <= 3, "Our name array is only setup to handle 3 inputs"); - const char* szNames[] = {"X", "W", "B"}; - bool isInitializers[] = {false, is_filter_initializer, false}; + const char* input_names[] = {"X", "W", "B"}; + bool is_initializers[] = {false, is_weight_and_bias_initializer, is_weight_and_bias_initializer}; for (size_t i = 0; i < inputs.size(); i++) { - test.AddInput(szNames[i], input_shapes[i], inputs[i], isInitializers[i]); + test.AddInput(input_names[i], input_shapes[i], inputs[i], is_initializers[i]); } test.AddOutput("Y", expected_output_shape, expected_output); diff --git a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md index b65b0f64686a9..5609033fc3e35 100644 --- a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md +++ b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md @@ -7,6 +7,7 @@ Keep in sync with doco generated from /docs/execution-providers/CoreML-Execution |ai.onnx:AveragePool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.| |ai.onnx:Clip|| |ai.onnx:Conv|Only 1D/2D Conv is supported.
Bias if provided must be constant.| +|ai.onnx:ConvTranspose|Weight and bias must be constant.
padding_type of SAME_UPPER/SAME_LOWER is not supported.
kernel_shape must have default values.
output_shape is not supported.
output_padding must have default values.| |ai.onnx:Div|| |ai.onnx:Gemm|Input B must be constant.| |ai.onnx:GlobalAveragePool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.|