diff --git a/cmake/onnxruntime_providers_coreml.cmake b/cmake/onnxruntime_providers_coreml.cmake index c9f35e5337f9b..8f3b1828e1c61 100644 --- a/cmake/onnxruntime_providers_coreml.cmake +++ b/cmake/onnxruntime_providers_coreml.cmake @@ -111,7 +111,7 @@ if(_enable_ML_PROGRAM) file(GLOB onnxruntime_providers_coreml_modelpackage_cc_srcs CONFIGURE_DEPENDS "${coremltools_SOURCE_DIR}/modelpackage/src/ModelPackage.?pp" - "${coremltools_SOURCE_DIR}/modelpackage/src/Utils/JsonMap.?pp" + "${coremltools_SOURCE_DIR}/modelpackage/src/utils/JsonMap.?pp" ) set(coremltools_srcs diff --git a/onnxruntime/core/providers/coreml/builders/coreml_spec.h b/onnxruntime/core/providers/coreml/builders/coreml_spec.h index c9adba9e579d0..9448f1167990e 100644 --- a/onnxruntime/core/providers/coreml/builders/coreml_spec.h +++ b/onnxruntime/core/providers/coreml/builders/coreml_spec.h @@ -17,14 +17,19 @@ #ifdef HAS_SHORTEN_64_TO_32 #pragma GCC diagnostic ignored "-Wshorten-64-to-32" #endif +#elif defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4244) // conversion from long to int #endif // Model.pb.h is generated in the build output directory from the CoreML protobuf files in -// onnxruntime/core/providers/coreml/coremltools/mlmodel/format +// /_deps/coremltools-src/mlmodel/format #include "coreml_proto/Model.pb.h" #if defined(__GNUC__) #pragma GCC diagnostic pop +#elif defined(_MSC_VER) +#pragma warning(pop) #endif namespace COREML_SPEC = CoreML::Specification; diff --git a/onnxruntime/core/providers/coreml/builders/helper.cc b/onnxruntime/core/providers/coreml/builders/helper.cc index bc3ba4432e66d..b8ebbd05a2a20 100644 --- a/onnxruntime/core/providers/coreml/builders/helper.cc +++ b/onnxruntime/core/providers/coreml/builders/helper.cc @@ -85,9 +85,15 @@ bool IsInputSupported(const Node& node, const NodeArg& input, } if (dim == 0) { - LOGS(logger, WARNING) << "CoreML does not support shapes with dimension values of 0. Input:" << input_name - << ", shape: " << Shape2String(shape); - return false; + if (node.OpType() == "Resize" && &input == node.InputDefs()[1]) { + // one special case. Resize 'roi' input was originally a required input but is rarely used. + // ROI is not supported in the CoreML implementation so we will ignore the value, but is often added + // (at least in the unit tests) as an initializer with shape {0}. + } else { + LOGS(logger, WARNING) << "CoreML does not support shapes with dimension values of 0. Input:" << input_name + << ", shape: " << Shape2String(shape); + return false; + } } } @@ -125,7 +131,7 @@ std::unordered_set GetSupportedNodes(const GraphViewer& graph_viewe bool CheckIsConstantInitializer(const NodeArg& node_arg, const GraphViewer& graph_viewer, const logging::Logger& logger, std::string_view input_description) { - if (graph_viewer.GetConstantInitializer(node_arg.Name(), true) == nullptr) { + if (graph_viewer.GetConstantInitializer(node_arg.Name()) == nullptr) { LOGS(logger, VERBOSE) << input_description << " (NodeArg name: '" << node_arg.Name() << "') is not a constant initializer tensor"; return false; diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc index 2570e6d88ae0d..83a572f4b60fa 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc @@ -83,9 +83,14 @@ bool BaseOpBuilder::HasSupportedInputs(const Node& node, const OpBuilderInputPar } /* static */ -bool BaseOpBuilder::IsInput0Supported(const Node& node, const OpBuilderInputParams& /*input_params*/, - const logging::Logger& logger) { - const auto& input = *node.InputDefs()[0]; +bool BaseOpBuilder::IsInputFloat(const Node& node, size_t idx, const OpBuilderInputParams& /*input_params*/, + const logging::Logger& logger) { + if (idx >= node.InputDefs().size()) { + LOGS(logger, VERBOSE) << "Input index [" << idx << "] is out of range"; + return false; + } + + const auto& input = *node.InputDefs()[idx]; int32_t input_type = ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED; @@ -102,7 +107,7 @@ bool BaseOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInpu const logging::Logger& logger) const { // We only check the type of input 0 by default // specific op builder can override this - return IsInput0Supported(node, input_params, logger); + return IsInputFloat(node, 0, input_params, logger); } bool BaseOpBuilder::HasSupportedOpSet(const Node& node, const logging::Logger& logger) const { diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h index 06c4dd94ea30d..63f0b813d654c 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h +++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h @@ -28,9 +28,9 @@ class BaseOpBuilder : public IOpBuilder { void AddInitializersToSkip(ModelBuilder& /*model_builder*/, const Node& /*node*/) const override {} protected: - // check if the first input's data type is supported. - static bool IsInput0Supported(const Node& node, const OpBuilderInputParams& input_params, - const logging::Logger& logger); + // currently we only support float + static bool IsInputFloat(const Node& node, size_t idx, const OpBuilderInputParams& input_params, + const logging::Logger& logger); private: virtual bool IsOpSupportedImpl(const Node& /*node*/, const OpBuilderInputParams& /*input_params*/, diff --git a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc index 6074fba1433d9..fb8e07633621f 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc @@ -5,6 +5,7 @@ #include "core/providers/common.h" #include "core/providers/coreml/builders/helper.h" #include "core/providers/coreml/builders/impl/base_op_builder.h" +#include "core/providers/coreml/builders/impl/builder_utils.h" #include "core/providers/coreml/builders/model_builder.h" #include "core/providers/coreml/builders/op_builder_factory.h" #include "core/providers/shared/utils/utils.h" @@ -19,6 +20,8 @@ class BinaryOpBuilder : public BaseOpBuilder { bool HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const override; + + bool SupportsMLProgram() const override { return true; } }; namespace { @@ -57,38 +60,72 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const const auto& op_type(node.OpType()); const auto& input_defs(node.InputDefs()); - std::unique_ptr layer = model_builder.CreateNNLayer(node); - - if (op_type == "Add") { - // original mutable_add() has limited broadcasting support - // updated to use CoreML::AddBroadcastableLayerParams which has more general broadcasting support - if (CheckIfBothInputShapesMatch(node, logger)) { - layer->mutable_add(); +#if defined(COREML_ENABLE_MLPROGRAM) + if (model_builder.CreateMLProgram()) { + using namespace CoreML::Specification::MILSpec; + + // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#module-coremltools.converters.mil.mil.ops.defs.iOS15.elementwise_binary + std::string_view coreml_op_type; + if (op_type == "Add") { + coreml_op_type = "add"; + } else if (op_type == "Mul") { + coreml_op_type = "mul"; + } else if (op_type == "Sub") { + coreml_op_type = "sub"; + } else if (op_type == "Div") { + // we only support fp32 currently. when we add support for integers we need to check the type and use + // "floor_div" or "real_div" accordingly + coreml_op_type = "real_div"; + } else if (op_type == "Pow") { + coreml_op_type = "pow"; } else { - layer->mutable_addbroadcastable(); + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "BinaryOpBuilder::AddToModelBuilderImpl, unexpected op: ", op_type); } - } else if (op_type == "Mul") { - if (CheckIfBothInputShapesMatch(node, logger)) { - layer->mutable_multiply(); + + std::unique_ptr op = model_builder.CreateOperation(node, coreml_op_type); + AddOperationInput(*op, "x", input_defs[0]->Name()); + AddOperationInput(*op, "y", input_defs[1]->Name()); + AddOperationOutput(*op, *node.OutputDefs()[0]); + + model_builder.AddOperation(std::move(op)); + } else +#endif // defined (COREML_ENABLE_MLPROGRAM) + { + std::unique_ptr layer = model_builder.CreateNNLayer(node); + + if (op_type == "Add") { + // original mutable_add() has limited broadcasting support + // updated to use CoreML::AddBroadcastableLayerParams which has more general broadcasting support + if (CheckIfBothInputShapesMatch(node, logger)) { + layer->mutable_add(); + } else { + layer->mutable_addbroadcastable(); + } + } else if (op_type == "Mul") { + if (CheckIfBothInputShapesMatch(node, logger)) { + layer->mutable_multiply(); + } else { + layer->mutable_multiplybroadcastable(); + } + } else if (op_type == "Sub") { + layer->mutable_subtractbroadcastable(); + } else if (op_type == "Div") { + layer->mutable_dividebroadcastable(); + } else if (op_type == "Pow") { + layer->mutable_powbroadcastable(); } else { - layer->mutable_multiplybroadcastable(); + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "BinaryOpBuilder::AddToModelBuilderImpl, unexpected op: ", op_type); } - } else if (op_type == "Sub") { - layer->mutable_subtractbroadcastable(); - } else if (op_type == "Div") { - layer->mutable_dividebroadcastable(); - } else if (op_type == "Pow") { - layer->mutable_powbroadcastable(); - } else { - return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, - "BinaryOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type); - } - *layer->mutable_input()->Add() = input_defs[0]->Name(); - *layer->mutable_input()->Add() = input_defs[1]->Name(); - *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + *layer->mutable_input()->Add() = input_defs[0]->Name(); + *layer->mutable_input()->Add() = input_defs[1]->Name(); + *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + + model_builder.AddLayer(std::move(layer)); + } - model_builder.AddLayer(std::move(layer)); return Status::OK(); } @@ -99,25 +136,11 @@ int BinaryOpBuilder::GetMinSupportedOpSet(const Node& /* node */) const { bool BinaryOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const { - if (node.OpType() != "Pow") { - return IsInput0Supported(node, input_params, logger); - } - - const auto& input_1 = *node.InputDefs()[0]; - const auto& input_2 = *node.InputDefs()[1]; - - // Pow we only support both inputs as fp32 for now - int32_t input_type_1; - int32_t input_type_2; - if (!GetType(input_1, input_type_1, logger) || - !GetType(input_2, input_type_2, logger)) { - return false; - } - - if (input_type_1 != ONNX_NAMESPACE::TensorProto_DataType_FLOAT || input_type_1 != input_type_2) { - LOGS(logger, VERBOSE) << "Pow only supports fp32 inputs, actual input type" - << ", Input type 1: " << input_type_1 - << ", Input type 2: " << input_type_2; + // Add/Sub/Mul/Div spec says inputs must be of the same type. + // Pow spec says inputs can be different types. + // We only support float for all of these inputs. + if (!IsInputFloat(node, 0, input_params, logger) || + ((node.OpType() == "Pow") && !IsInputFloat(node, 1, input_params, logger))) { return false; } diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc index 710f596b2a562..cbea969904ed5 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc @@ -7,6 +7,7 @@ #include "core/framework/tensorprotoutils.h" #include "core/providers/coreml/builders/coreml_spec.h" #include "core/providers/coreml/builders/helper.h" +#include "core/providers/coreml/builders/model_builder.h" #include "core/providers/shared/utils/utils.h" #include "core/optimizer/initializer.h" @@ -132,6 +133,7 @@ void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::spansize(); + size_t num_dims = num_pads / 2; + std::vector reordered_pads(num_pads, 0); + for (size_t i = 0; i < num_pads; ++i) { + auto cur_dim = i % num_dims; + if (i < num_dims) { // start values + reordered_pads[cur_dim * 2] = (*onnx_pads)[i]; + } else { // end values + reordered_pads[cur_dim * 2 + 1] = (*onnx_pads)[i]; + } + } + + AddOperationInput(op, "pad", model_builder.AddConstant(op_type, "pad", reordered_pads)); + + break; + } + + // fall through if explicit pads were not provided as the default value for `pads` is all zeros, + // which is the same as 'valid' padding. + [[fallthrough]]; + } + case AutoPadType::VALID: + AddOperationInput(op, "pad_type", + model_builder.AddScalarConstant(op_type, "pad_type", std::string("valid"))); + + break; + case AutoPadType::SAME_UPPER: + case AutoPadType::SAME_LOWER: { + const auto pad_type = (auto_pad_type == AutoPadType::SAME_UPPER ? "same" : "same_lower"); + AddOperationInput(op, "pad_type", + model_builder.AddScalarConstant(op_type, "pad_type", std::string(pad_type))); + + // despite what the spec says, a 'pad' input seems to be required. + // https://github.com/apple/coremltools/issues/2127 + // Provide the default value as that's what coremltools does for conv/avg_pool/max_pool. + std::vector ignored_pads(num_spatial_dims * 2, 0); + AddOperationInput(op, "pad", model_builder.AddConstant(op_type, "pad", ignored_pads)); + + break; + } + } +} +#endif // defined(COREML_ENABLE_MLPROGRAM) } // namespace coreml } // namespace onnxruntime diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h index 8126f0c126914..2804589065631 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h @@ -11,13 +11,15 @@ #include "core/common/status.h" #include "core/graph/basic_types.h" #include "core/providers/common.h" - #include "core/providers/coreml/builders/coreml_spec.h" +#include "core/providers/shared/utils/utils.h" namespace onnxruntime { class NodeArg; namespace coreml { +class ModelBuilder; + // Try to see if we can map explicit padding to auto padding for Conv/Pool // Since usually use auto padding is more efficient Status HandleAutoPad(const std::vector input_shape, @@ -45,6 +47,7 @@ void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span data); +#if defined(COREML_ENABLE_MLPROGRAM) // // MLProgram utils // @@ -130,5 +133,17 @@ void AddOperationInput(COREML_SPEC::MILSpec::Operation& op, /// Operation to update. /// NodeArg with details of output to add. void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output); + +/// +/// Add pad_type and pad values. +/// +/// Operator to update +/// ModelBuilder to add constants with. +/// Operator type. +/// Node attribute helper. +/// Number of spatial dims in input. Generally rank - 2 (ignore N and C dims). +void AddPadTypeAndPads(COREML_SPEC::MILSpec::Operation& op, ModelBuilder& model_builder, std::string_view op_type, + const NodeAttrHelper& helper, int num_spatial_dims); +#endif // defined(COREML_ENABLE_MLPROGRAM) } // namespace coreml } // namespace onnxruntime diff --git a/onnxruntime/core/providers/coreml/builders/impl/clip_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/clip_op_builder.cc index 9aca172abec98..41f4041ef1181 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/clip_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/clip_op_builder.cc @@ -2,6 +2,7 @@ // Licensed under the MIT License. #include "core/providers/coreml/builders/impl/base_op_builder.h" +#include "core/providers/coreml/builders/impl/builder_utils.h" #include "core/providers/coreml/builders/model_builder.h" #include "core/providers/coreml/builders/op_builder_factory.h" #include "core/providers/shared/utils/utils.h" @@ -17,11 +18,31 @@ class ClipOpBuilder : public BaseOpBuilder { bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const override; + + bool SupportsMLProgram() const override { return true; } }; void ClipOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const { + bool skip = true; + + if (model_builder.CreateMLProgram()) { + float min, max; + ORT_IGNORE_RETURN_VALUE(GetClipMinMax(model_builder.GetGraphViewer(), node, min, max, model_builder.Logger())); + + bool has_min = min != std::numeric_limits::lowest(); + bool has_max = max != std::numeric_limits::max(); + if (has_min && has_max && min == 0.f && max == 6.f) { + // relu6 - skip both + } else if (has_min && min == 0.f && !has_max) { + // relu - skip both + } else { + // clip - we will use both + skip = false; + } + } + // Both min and max values will be injected into the layer, no need to add to the model - if (node.SinceVersion() >= 11) { + if (skip && node.SinceVersion() >= 11) { if (node.InputDefs().size() > 1) model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name()); @@ -35,72 +56,126 @@ Status ClipOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const logging::Logger& logger) const { const auto& node_name = node.Name(); const auto& input_name = node.InputDefs()[0]->Name(); - const auto& output_name = node.OutputDefs()[0]->Name(); + const auto& output = *node.OutputDefs()[0]; + const auto& output_name = output.Name(); float min, max; ORT_RETURN_IF_NOT(GetClipMinMax(model_builder.GetGraphViewer(), node, min, max, logger), "GetClipMinMax failed"); bool has_min = min != std::numeric_limits::lowest(); bool has_max = max != std::numeric_limits::max(); - if (!has_min && !has_max) { - // Clip without min/max is an identity node - // In CoreML we don't have identity, use ActivationLinear instead - std::unique_ptr layer = model_builder.CreateNNLayer(node); - layer->mutable_activation()->mutable_linear()->set_alpha(1.0f); - *layer->mutable_input()->Add() = input_name; - *layer->mutable_output()->Add() = output_name; - - model_builder.AddLayer(std::move(layer)); - } else { - // The implementation of clip(min, max) is done by - // 1. Clipping at min -> max(input, min) is handled by - // min_output = threshold(input, min) - // 2. Clipping at max -> min(min_output, max) is handled by - // output = -1 * (threshold(-min_output, -max)) - - // Now we have at least one or min or max is not default value - // Clipping at max will need take the output of clipping at min, or the node input, if min value is default - // If max value is default, the output of clipping at min will be the output of the node - std::string min_output_name = output_name; - if (has_max) { - min_output_name = has_min - ? model_builder.GetUniqueName(node_name + "min_output") - : input_name; +#if defined(COREML_ENABLE_MLPROGRAM) + if (model_builder.CreateMLProgram()) { + using namespace CoreML::Specification::MILSpec; + + std::unique_ptr op; + if (!has_min && !has_max) { + // Clip without min/max is an identity node. + op = model_builder.CreateOperation(node, "identity"); + Operation& identity_op = *op; + AddOperationInput(identity_op, "x", input_name); + } else { + if (has_min && has_max && min == 0.f && max == 6.f) { + // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.activation.relu6 + op = model_builder.CreateOperation(node, "relu6"); + Operation& relu6_op = *op; + AddOperationInput(relu6_op, "x", input_name); + } else if (has_min && min == 0.f && !has_max) { + // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.activation.relu + op = model_builder.CreateOperation(node, "relu"); + Operation& relu_op = *op; + AddOperationInput(relu_op, "x", input_name); + } else { + // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.elementwise_unary.clip + op = model_builder.CreateOperation(node, "clip"); + + Operation& clip_op = *op; + AddOperationInput(clip_op, "x", input_name); + + // if min and max were attributes we need to add initializers. otherwise we use the existing inputs + const bool min_max_attribs = node.SinceVersion() < 11; + std::string_view min_name = min_max_attribs ? model_builder.AddScalarConstant(clip_op.type(), "min", min) + : node.InputDefs()[1]->Name(); + + AddOperationInput(clip_op, "alpha", min_name); + + if (has_max) { + std::string_view max_name = min_max_attribs ? model_builder.AddScalarConstant(clip_op.type(), "max", max) + : node.InputDefs()[2]->Name(); + AddOperationInput(clip_op, "beta", max_name); + } + } } - // Handle clipping at min first - if (has_min) { - std::unique_ptr min_layer = model_builder.CreateNNLayer(node, "_Clip_min"); - if (min == 0.0f) { // If min is 0. then this min will be handled by relu - min_layer->mutable_activation()->mutable_relu(); - } else { // otherwise, min will be handled by unary->threshold - min_layer->mutable_unary()->set_alpha(min); - min_layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::THRESHOLD); + AddOperationOutput(*op, output); + model_builder.AddOperation(std::move(op)); + } else +#endif // defined(COREML_ENABLE_MLPROGRAM) + { + // TODO: CoreML has a Clip layer for NeuralNetwork. Added in CoreML 4. We could potentially use that if available + // to simplify. + // https://apple.github.io/coremltools/mlmodel/Format/NeuralNetwork.html#cliplayerparams + + if (!has_min && !has_max) { + // Clip without min/max is an identity node + // In CoreML we don't have identity, use ActivationLinear instead + std::unique_ptr layer = model_builder.CreateNNLayer(node); + layer->mutable_activation()->mutable_linear()->set_alpha(1.0f); + *layer->mutable_input()->Add() = input_name; + *layer->mutable_output()->Add() = output_name; + + model_builder.AddLayer(std::move(layer)); + } else { + // The implementation of clip(min, max) is done by + // 1. Clipping at min -> max(input, min) is handled by + // min_output = threshold(input, min) + // 2. Clipping at max -> min(min_output, max) is handled by + // output = -1 * (threshold(-min_output, -max)) + + // Now we have at least one or min or max is not default value + // Clipping at max will need take the output of clipping at min, or the node input, if min value is default + // If max value is default, the output of clipping at min will be the output of the node + std::string min_output_name = output_name; + if (has_max) { + min_output_name = has_min + ? model_builder.GetUniqueName(node_name + "min_output") + : input_name; } - *min_layer->mutable_input()->Add() = input_name; - *min_layer->mutable_output()->Add() = min_output_name; - model_builder.AddLayer(std::move(min_layer)); - } - - // Clipping at max is handled by -1 * (threshold (-min_output, -max)) - if (has_max) { - const auto threshold_output_name = model_builder.GetUniqueName(MakeString(node_name, "threshold_output")); - { // Add threshold layer, which is actually max( -1 * min_output, -max) - auto threshold_layer = model_builder.CreateNNLayer(node, "_Clip_max_threshold"); - threshold_layer->mutable_unary()->set_alpha(-max); - threshold_layer->mutable_unary()->set_scale(-1.0f); - threshold_layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::THRESHOLD); - *threshold_layer->mutable_input()->Add() = min_output_name; - *threshold_layer->mutable_output()->Add() = threshold_output_name; - model_builder.AddLayer(std::move(threshold_layer)); + // Handle clipping at min first + if (has_min) { + std::unique_ptr min_layer = model_builder.CreateNNLayer(node, "_Clip_min"); + if (min == 0.0f) { // If min is 0. then this min will be handled by relu + min_layer->mutable_activation()->mutable_relu(); + } else { // otherwise, min will be handled by unary->threshold + min_layer->mutable_unary()->set_alpha(min); + min_layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::THRESHOLD); + } + + *min_layer->mutable_input()->Add() = input_name; + *min_layer->mutable_output()->Add() = min_output_name; + model_builder.AddLayer(std::move(min_layer)); } - { // Add linear activation layer -1 * threshold_output - auto linear_layer = model_builder.CreateNNLayer(node, "_Clip_max_linear"); - linear_layer->mutable_activation()->mutable_linear()->set_alpha(-1.0f); - *linear_layer->mutable_input()->Add() = threshold_output_name; - *linear_layer->mutable_output()->Add() = output_name; - model_builder.AddLayer(std::move(linear_layer)); + + // Clipping at max is handled by -1 * (threshold (-min_output, -max)) + if (has_max) { + const auto threshold_output_name = model_builder.GetUniqueName(MakeString(node_name, "threshold_output")); + { // Add threshold layer, which is actually max( -1 * min_output, -max) + auto threshold_layer = model_builder.CreateNNLayer(node, "_Clip_max_threshold"); + threshold_layer->mutable_unary()->set_alpha(-max); + threshold_layer->mutable_unary()->set_scale(-1.0f); + threshold_layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::THRESHOLD); + *threshold_layer->mutable_input()->Add() = min_output_name; + *threshold_layer->mutable_output()->Add() = threshold_output_name; + model_builder.AddLayer(std::move(threshold_layer)); + } + { // Add linear activation layer -1 * threshold_output + auto linear_layer = model_builder.CreateNNLayer(node, "_Clip_max_linear"); + linear_layer->mutable_activation()->mutable_linear()->set_alpha(-1.0f); + *linear_layer->mutable_input()->Add() = threshold_output_name; + *linear_layer->mutable_output()->Add() = output_name; + model_builder.AddLayer(std::move(linear_layer)); + } } } } diff --git a/onnxruntime/core/providers/coreml/builders/impl/conv_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/conv_op_builder.cc index 05e43dbbd16af..38125957bf481 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/conv_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/conv_op_builder.cc @@ -67,99 +67,25 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N AddOperationInput(*conv_op, "bias", input_defs[2]->Name()); } - // ONNX attributes. Add as inputs if specified/required - auto strides = helper.GetInt64s("strides"); - auto dilations = helper.GetInt64s("dilations"); - auto groups = helper.GetInt64("group"); - // we know this input has a valid shape due to the check in IsOpSupportedImpl. ignore N and C dims. const auto num_spatial_dims = input_defs[1]->Shape()->dim_size() - 2; const auto& op_type = conv_op->type(); - if (strides) { - AddOperationInput(*conv_op, "strides", model_builder.AddConstant(op_type, "strides", *strides)); - } else { - // spec says optional. testing suggests otherwise for at least the iOS15 target (CoreML5) - static const auto default_value = std::vector(num_spatial_dims, 1); - AddOperationInput(*conv_op, "strides", model_builder.AddConstant(op_type, "strides", default_value)); - } + // Spec says strides and dilations are optional, but reality is they're required for at least the iOS15 target + // (CoreML5). + const auto strides = helper.Get("strides", std::vector(num_spatial_dims, 1)); + auto dilations = helper.Get("dilations", std::vector(num_spatial_dims, 1)); + auto groups = helper.GetInt64("group"); - if (dilations) { - AddOperationInput(*conv_op, "dilations", model_builder.AddConstant(op_type, "dilations", *dilations)); - } else { - // spec says optional. testing suggests otherwise for at least the iOS15 target (CoreML5) - static const auto default_value = std::vector(num_spatial_dims, 1); - AddOperationInput(*conv_op, "dilations", model_builder.AddConstant(op_type, "dilations", default_value)); - } + AddOperationInput(*conv_op, "strides", model_builder.AddConstant(op_type, "strides", strides)); + AddOperationInput(*conv_op, "dilations", model_builder.AddConstant(op_type, "dilations", dilations)); if (groups) { AddOperationInput(*conv_op, "groups", model_builder.AddScalarConstant(op_type, "groups", *groups)); } - AutoPadType auto_pad_type = StringToAutoPadType(helper.Get("auto_pad", "NOTSET")); - - // pad type (string) - // valid - no pads (ONNX auto_pad VALID) - // custom - pads input (ONNX NOTSET) - // same - inferred to be `d_out[i] = ceil(d_in[i] / strides[i])` (assuming == ONNX SAME_UPPER) - // same_lower - as per same but any extra rows/cols are added at top/left if padding is odd (ONNX SAME_LOWER) - // - // TODO: See if we want to update HandleAutoPad to support 1D (and 3D) so we can infer if an autopad value - // can be used. TBD if that provides any performance benefit with ML Program though as CoreML could - // potentially do that for us. - switch (auto_pad_type) { - case AutoPadType::NOTSET: { - // use `pads` attribute. - auto onnx_pads = helper.GetInt64s("pads"); // 'pads' must be provided if auto_pad is NOTSET - if (onnx_pads) { - AddOperationInput(*conv_op, "pad_type", - model_builder.AddScalarConstant(op_type, "pad_type", std::string("custom"))); - - // need to re-order from x1_start, x2_start..., x1_end, x2_end... to - // x1_start, x1_end, x2_start, x2_end,... - size_t num_pads = onnx_pads->size(); - size_t num_dims = num_pads / 2; - std::vector reordered_pads(num_pads, 0); - for (size_t i = 0; i < num_pads; ++i) { - auto cur_dim = i % num_dims; - if (i < num_dims) { // start values - reordered_pads[cur_dim * 2] = (*onnx_pads)[i]; - } else { // end values - reordered_pads[cur_dim * 2 + 1] = (*onnx_pads)[i]; - } - } - - AddOperationInput(*conv_op, "pad", model_builder.AddConstant(op_type, "pad", reordered_pads)); - - break; - } - - // in theory the pads may not be provided and in that case the default is no padding. - // as that is the same as 'valid', fall through - [[fallthrough]]; - } - case AutoPadType::VALID: - AddOperationInput(*conv_op, "pad_type", - model_builder.AddScalarConstant(op_type, "pad_type", std::string("valid"))); - - break; - case AutoPadType::SAME_UPPER: - case AutoPadType::SAME_LOWER: { - const auto pad_type = (auto_pad_type == AutoPadType::SAME_UPPER ? "same" : "same_lower"); - AddOperationInput(*conv_op, "pad_type", - model_builder.AddScalarConstant(op_type, "pad_type", std::string(pad_type))); - - // despite what the spec says, a 'pad' input seems to be required. - // https://github.com/apple/coremltools/issues/2127 - // provide the default value. passing in an empty vector also works. TBD what's better. - std::vector ignored_pads(num_spatial_dims * 2, 0); - AddOperationInput(*conv_op, "pad", model_builder.AddConstant(op_type, "pad", ignored_pads)); - - break; - } - } + AddPadTypeAndPads(*conv_op, model_builder, op_type, helper, num_spatial_dims); - // set output AddOperationOutput(*conv_op, *node.OutputDefs()[0]); model_builder.AddOperation(std::move(conv_op)); @@ -297,7 +223,7 @@ bool ConvOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPara const auto& input_defs = node.InputDefs(); const auto& weight_name = input_defs[1]->Name(); - const auto* weight = input_params.graph_viewer.GetConstantInitializer(weight_name, true); + const auto* weight = input_params.graph_viewer.GetConstantInitializer(weight_name); #if defined(COREML_ENABLE_MLPROGRAM) if (input_params.create_mlprogram) { @@ -324,7 +250,7 @@ bool ConvOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPara return false; } - if (input_defs.size() > 2 && !input_params.graph_viewer.GetConstantInitializer(input_defs[2]->Name(), true)) { + if (input_defs.size() > 2 && !input_params.graph_viewer.GetConstantInitializer(input_defs[2]->Name())) { LOGS(logger, VERBOSE) << "The bias of Conv [" << name << "] must be a constant initializer"; return false; } diff --git a/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc index 48f77354d7c30..8daf64dc4a457 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc @@ -22,18 +22,51 @@ class GemmOpBuilder : public BaseOpBuilder { Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, const logging::Logger& logger) const override; - bool IsOpSupportedImpl(const Node& /* node */, const OpBuilderInputParams& /* input_params */, - const logging::Logger& /* logger */) const override; + bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, + const logging::Logger& logger) const override; + + bool SupportsMLProgram() const override { return true; } }; void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const { const auto& op = node.OpType(); const auto& input_defs(node.InputDefs()); - // We have already embedded the weights (matrix B and C(if any)) into the coreml layer - // No need to copy them later to reduce memory consumption - model_builder.AddInitializerToSkip(input_defs[1]->Name()); - if (op == "Gemm" && input_defs.size() > 2) { - model_builder.AddInitializerToSkip(input_defs[2]->Name()); + const bool is_gemm = op == "Gemm"; + +#if defined(COREML_ENABLE_MLPROGRAM) + if (model_builder.CreateMLProgram()) { + // we have to transpose the weight input of Gemm if transB is false, and potentially override the bias shape + if (is_gemm) { + NodeAttrHelper helper(node); + const auto transB = helper.Get("transB", 0); + if (transB == 0) { + model_builder.AddInitializerToSkip(input_defs[1]->Name()); + } + + if (input_defs.size() > 2) { + // ONNX spec requires B to be 2D and we required it to be a constant initializer so reading N this way is safe + // B is {K, N] by default. or {N, K} if transB is true + int N_dim = transB ? 0 : 1; + int64_t N = input_defs[1]->Shape()->dim().at(N_dim).dim_value(); + + const auto& bias_name = input_defs[2]->Name(); + const auto& bias = *model_builder.GetConstantInitializer(bias_name); + if (bias.dims_size() != 1 || bias.dims(0) != N) { + // we have to override the shape/duplicate data to convert {}, {1} or {1, N} to 1D {N} + // when adding the Gemm operation so skip adding the original initializer + model_builder.AddInitializerToSkip(bias_name); + } + } + } + } else +#endif // defined(COREML_ENABLE_MLPROGRAM) + { + // We have already embedded the weights (matrix B and C(if any)) into the coreml layer + // No need to copy them later to reduce memory consumption + model_builder.AddInitializerToSkip(input_defs[1]->Name()); + if (is_gemm && input_defs.size() > 2) { + model_builder.AddInitializerToSkip(input_defs[2]->Name()); + } } } @@ -57,54 +90,152 @@ static Status GetTensorFloatDataTransposed(const ONNX_NAMESPACE::TensorProto& te } Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, - const logging::Logger& /* logger */) const { + const logging::Logger& logger) const { std::unique_ptr layer = model_builder.CreateNNLayer(node); const auto& op_type = node.OpType(); const auto& input_defs = node.InputDefs(); - const auto& b_tensor = *model_builder.GetInitializerTensors().at(input_defs[1]->Name()); - const auto& b_shape = b_tensor.dims(); - - auto* coreml_inner_product = layer->mutable_innerproduct(); - - // The coreml innerproduct weight (matrix B) is stored transposed - // - for MatMul and Gemm (transB = 0), the coreml weight is B' - // - for Gemm (transB = 1), the coreml weight is B - if (op_type == "MatMul") { - coreml_inner_product->set_inputchannels(b_shape[0]); - coreml_inner_product->set_outputchannels(b_shape[1]); - // Add weight (b of MatMul) - std::vector b_transposed; - ORT_RETURN_IF_ERROR(GetTensorFloatDataTransposed(b_tensor, b_transposed)); - CreateCoreMLWeight(*coreml_inner_product->mutable_weights(), b_transposed); - } else { // Gemm - NodeAttrHelper helper(node); - const auto transB = helper.Get("transB", 0); - if (transB == 0) { - coreml_inner_product->set_inputchannels(b_shape[0]); - coreml_inner_product->set_outputchannels(b_shape[1]); + const auto& a = *input_defs[0]; + const auto& b = *input_defs[1]; + const auto* b_initializer = model_builder.GetConstantInitializer(b.Name()); // MLProgram MatMul may not be constant + + const bool is_matmul = op_type == "MatMul"; + const bool is_gemm = op_type == "Gemm"; + + NodeAttrHelper helper(node); + const auto transB = is_gemm ? helper.Get("transB", 0) : 0; + + std::vector b_shape; + ORT_IGNORE_RETURN_VALUE(GetShape(b, b_shape, logger)); + int64_t b0 = -1, b1 = -1; + + // ML Program MatMul supports N-D input + if (model_builder.CreateMLProgram() && is_matmul) { + if (b_shape.size() == 1) { + // B is treated as {b_shape[0], 1} according to the numpy rules. + b0 = b_shape[0]; + b1 = 1; + } else { + // last 2 dims are used + b0 = b_shape[b_shape.size() - 2]; + b1 = b_shape[b_shape.size() - 1]; + } + } else { + // we only support 2D input + b0 = b_shape[0]; + b1 = b_shape[1]; + } + + // B is {K, N} in ONNX spec by default, or {N, K} in Gemm if transB is true + const auto K = transB ? b1 : b0; + const auto N = transB ? b0 : b1; + +#if defined(COREML_ENABLE_MLPROGRAM) + if (model_builder.CreateMLProgram()) { + using namespace CoreML::Specification::MILSpec; + + if (is_gemm) { + // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.linear.linear + auto gemm_op = model_builder.CreateOperation(node, "linear"); + AddOperationInput(*gemm_op, "x", a.Name()); + + // CoreML takes weight input as {N, K} which is the reverse of ONNX. + // if transB is true the input weight is {N, K} so can be added directly. + if (transB) { + AddOperationInput(*gemm_op, "weight", b.Name()); + } else { + // transpose from {K, N} to {N, K} + std::vector weight_nk; + std::vector weight_nk_shape = {N, K}; + ORT_RETURN_IF_ERROR(GetTensorFloatDataTransposed(*b_initializer, weight_nk)); + + AddOperationInput(*gemm_op, "weight", + model_builder.AddConstant(gemm_op->type(), b.Name() + "_t", weight_nk, weight_nk_shape)); + } + + if (input_defs.size() == 3) { + const auto& bias_arg = *input_defs[2]; + const auto& bias = *model_builder.GetConstantInitializer(bias_arg.Name()); + + // CoreML linear op requires bias to be 1D tensor of size N + if (bias.dims_size() == 1 && bias.dims().at(0) == N) { + // can use existing initializer + AddOperationInput(*gemm_op, "bias", bias_arg.Name()); + } else { + Initializer unpacked_tensor(bias); + auto bias_data = unpacked_tensor.DataAsSpan(); + std::string_view bias_data_name; + if (bias_data.size() == 1) { + // expand scalar to N + std::vector expanded_bias_data(N, bias_data[0]); + bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", expanded_bias_data); + } else { + // can use data as-is but need to adjust shape (inferred by AddConstant as {bias_data.size()}) + bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", bias_data); + } + + AddOperationInput(*gemm_op, "bias", bias_data_name); + } + } + + AddOperationOutput(*gemm_op, *node.OutputDefs()[0]); + model_builder.AddOperation(std::move(gemm_op)); + } else { + // CoreML implementation is the same as ONNX MatMul. + // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.linear.matmul + auto matmul_op = model_builder.CreateOperation(node, "matmul"); + AddOperationInput(*matmul_op, "x", a.Name()); + AddOperationInput(*matmul_op, "y", b.Name()); + + // once again the spec lies and says transpose_y and transpose_x are optional... + auto false_value_name = model_builder.AddScalarConstant(matmul_op->type(), "false", false); + AddOperationInput(*matmul_op, "transpose_x", false_value_name); + AddOperationInput(*matmul_op, "transpose_y", false_value_name); + + AddOperationOutput(*matmul_op, *node.OutputDefs()[0]); + model_builder.AddOperation(std::move(matmul_op)); + } + } else +#endif // defined(COREML_ENABLE_MLPROGRAM) + { + auto* coreml_inner_product = layer->mutable_innerproduct(); + + *layer->mutable_input()->Add() = a.Name(); + + coreml_inner_product->set_inputchannels(K); + coreml_inner_product->set_outputchannels(N); + + // CoreML takes weight input as {N, K} which is the reverse of ONNX. + // if Gemm's transB is true the input weight is {N, K} and can be added directly. + if (transB) { + ORT_RETURN_IF_ERROR(CreateCoreMLWeight(*coreml_inner_product->mutable_weights(), *b_initializer)); + } else { std::vector b_transposed; - ORT_RETURN_IF_ERROR(GetTensorFloatDataTransposed(b_tensor, b_transposed)); + ORT_RETURN_IF_ERROR(GetTensorFloatDataTransposed(*b_initializer, b_transposed)); CreateCoreMLWeight(*coreml_inner_product->mutable_weights(), b_transposed); - } else { - coreml_inner_product->set_inputchannels(b_shape[1]); - coreml_inner_product->set_outputchannels(b_shape[0]); - // Add weight (b of MatMul) - ORT_RETURN_IF_ERROR(CreateCoreMLWeight(*coreml_inner_product->mutable_weights(), b_tensor)); } - // Add bias if present - if (input_defs.size() > 2) { + if (is_gemm && input_defs.size() > 2) { + // Add bias coreml_inner_product->set_hasbias(true); - const auto& bias_tensor = *model_builder.GetInitializerTensors().at(input_defs[2]->Name()); - ORT_RETURN_IF_ERROR(CreateCoreMLWeight(*coreml_inner_product->mutable_bias(), bias_tensor)); + const auto& bias_tensor = *model_builder.GetConstantInitializer(input_defs[2]->Name()); + + // if scalar, or single value expand to 1D tensor of size N + // IsOpSupportedImpl enforces it's scalar, {1}, {N}, or {1, N}. + Initializer unpacked_tensor(bias_tensor); + auto bias_data = unpacked_tensor.DataAsSpan(); + if (bias_data.size() == 1 && N > 1) { + std::vector expanded_bias_data(N, bias_data[0]); + CreateCoreMLWeight(*coreml_inner_product->mutable_bias(), expanded_bias_data); + } else { + CreateCoreMLWeight(*coreml_inner_product->mutable_bias(), bias_data); + } } - } - *layer->mutable_input()->Add() = input_defs[0]->Name(); - *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + model_builder.AddLayer(std::move(layer)); + } - model_builder.AddLayer(std::move(layer)); return Status::OK(); } @@ -112,98 +243,105 @@ bool GemmOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPara const logging::Logger& logger) const { const auto& op_type = node.OpType(); const auto& input_defs(node.InputDefs()); + const bool is_matmul = op_type == "MatMul"; + const bool is_gemm = op_type == "Gemm"; + size_t a_idx = 0, b_idx = 1, c_idx = 2; // A*B+C - const auto& initializers = input_params.graph_viewer.GetAllInitializedTensors(); - if (!Contains(initializers, input_defs[b_idx]->Name())) { - LOGS(logger, VERBOSE) << "B of Gemm/Matmul must be an initializer tensor"; + std::vector a_shape; + if (!GetShape(*input_defs[a_idx], a_shape, logger)) { return false; } - std::vector a_shape; - { - if (!GetShape(*input_defs[a_idx], a_shape, logger)) - return false; - - if (a_shape.size() != 2) { - LOGS(logger, VERBOSE) << "A must be 2D"; - return false; - } + std::vector b_shape; + if (!GetShape(*input_defs[b_idx], b_shape, logger)) { + return false; + } - // TODO is it ok if the shape is dynamic and empty? - if (Product(a_shape) == 0) { - LOGS(logger, VERBOSE) << "A must be non-empty"; + if (!input_params.graph_viewer.GetConstantInitializer(input_defs[b_idx]->Name())) { + if (input_params.create_mlprogram && is_matmul) { + // ML Program MatMul allows non-constant B input + } else { + LOGS(logger, VERBOSE) << op_type << " B input must be a constant initializer"; return false; } } - std::vector b_shape; - { - if (!GetShape(*input_defs[b_idx], b_shape, logger)) - return false; - - if (b_shape.size() != 2) { - LOGS(logger, VERBOSE) << "B must be 2D"; - return false; - } + if (is_matmul) { + if (input_params.create_mlprogram) { + // ML Program matmul op has numpy semantics the same as the ONNX spec so we can use directly + } else { + // we could potentially support 1D and 3D if required. beyond 3D the dims that merge diverge. + // https://github.com/apple/coremltools/blob/1931758aae383c83daddfc56f11a24a9d2bf4b87/coremltools/converters/onnx/_operators.py#L1607 + // https://github.com/apple/coremltools/blob/1931758aae383c83daddfc56f11a24a9d2bf4b87/coremltools/converters/mil/backend/nn/op_mapping.py#L1374 + // https://apple.github.io/coremltools/mlmodel/Format/NeuralNetwork.html#innerproductlayerparams + if (a_shape.size() != 2 || b_shape.size() != 2) { + LOGS(logger, VERBOSE) << "a and b inputs must be 2D. "; + return false; + } - if (Product(b_shape) == 0) { - LOGS(logger, VERBOSE) << "B must be non-empty"; - return false; + if (input_defs.size() > 2) { + LOGS(logger, VERBOSE) << "MatMul with C input is not supported"; + return false; + } } } - if (op_type == "Gemm") { + if (is_gemm) { + // A and B are 2D due to the ONNX spec NodeAttrHelper helper(node); const auto transA = helper.Get("transA", 0); const auto transB = helper.Get("transB", 0); const auto alpha = helper.Get("alpha", 1.0f); const auto beta = helper.Get("beta", 1.0f); + + // TODO: We can support transA, alpha and beta by using multiple layers/operations if needed. if (!(transA == 0 && alpha == 1.f && beta == 1.f)) { - LOGS(logger, VERBOSE) << "Only transA == 0, alpha == 1.0 " - << "and beta == 1.0 is supported." + LOGS(logger, VERBOSE) << "Only support for transA == 0, alpha == 1.0 " + << "and beta == 1.0 is currently implemented." << " transA " << transA << " alpha " << alpha << " beta " << beta; return false; } - // C of Gemm - // For now we only support {n} or {1,n} tensor if (input_defs.size() == 3) { - if (!Contains(initializers, input_defs[c_idx]->Name())) { - LOGS(logger, VERBOSE) << "C of Gemm must be an initializer tensor"; + if (!input_params.graph_viewer.GetConstantInitializer(input_defs[c_idx]->Name())) { + LOGS(logger, VERBOSE) << "C of Gemm must be a constant initializer"; return false; } std::vector c_shape; - if (!GetShape(*input_defs[c_idx], c_shape, logger)) + if (!GetShape(*input_defs[c_idx], c_shape, logger)) { return false; + } - size_t c_dim = c_shape.size(); + // B is {K, N} in ONNX spec by default, or {N, K} in Gemm if transB is true + const auto N = transB ? b_shape[0] : b_shape[1]; - if (c_dim == 0) { - LOGS(logger, VERBOSE) << "C of Gemm cannot be a scalar"; - return false; - } + size_t c_rank = c_shape.size(); - if (c_dim != 1) { - // If C is a (2+)d tensor, it must have the format {1, 1, ..., 1, n} - // where every except the last dimension should be 1 - for (size_t i = 0; i < c_dim - 1; ++i) { - if (c_shape[i] != 1) { - LOGS(logger, VERBOSE) << "C of Gemm must be a vector or a tensor with only last dimension != 1"; - return false; + // allowed: scalar, or 1D where the value is 1 or N, 2D with shape {1, N} + bool c_valid = false; + switch (c_rank) { + case 0: + c_valid = true; + break; + case 1: + if (c_shape[0] == 1 || c_shape[0] == N) { + c_valid = true; } - } + break; + case 2: + if (c_shape[0] == 1 && c_shape[1] == N) { + c_valid = true; + } + break; } - auto c_size = c_shape[c_dim - 1]; - if (c_size != (transB == 0 ? b_shape[1] : b_shape[0])) { - LOGS(logger, VERBOSE) << "C of Gemm must be a vector of b_shape[" - << (transB == 0 ? "1" : "0") << "]" - << " b_shape: [" << b_shape[0] << ", " << b_shape[1] << "]" - << " c_size: " << c_size; + if (!c_valid) { + LOGS(logger, VERBOSE) << "Shape of C Gemm input must be {}, {1}, {N}, or {1, N}. N:" << N << " C shape:" + << Shape2String(c_shape); return false; } diff --git a/onnxruntime/core/providers/coreml/builders/impl/pool_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/pool_op_builder.cc index 01aced739b36d..17910ba6fd486 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/pool_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/pool_op_builder.cc @@ -19,104 +19,176 @@ class PoolOpBuilder : public BaseOpBuilder { bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const override; + + bool SupportsMLProgram() const override { return true; } }; Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, const logging::Logger& logger) const { - std::unique_ptr layer = model_builder.CreateNNLayer(node); - - auto* coreml_pool = layer->mutable_pooling(); const auto& op_type = node.OpType(); const auto& input_defs = node.InputDefs(); - bool is_global_pooling = false; - if (op_type == "GlobalAveragePool") { - is_global_pooling = true; - coreml_pool->set_type(COREML_SPEC::PoolingLayerParams_PoolingType_AVERAGE); - } else if (op_type == "GlobalMaxPool") { - is_global_pooling = true; - coreml_pool->set_type(COREML_SPEC::PoolingLayerParams_PoolingType_MAX); - } else if (op_type == "AveragePool") { - coreml_pool->set_type(COREML_SPEC::PoolingLayerParams_PoolingType_AVERAGE); - } else if (op_type == "MaxPool") { - coreml_pool->set_type(COREML_SPEC::PoolingLayerParams_PoolingType_MAX); - } else { - return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "PoolOpBuilder, unknown op: ", op_type); - } +#if defined(COREML_ENABLE_MLPROGRAM) + if (model_builder.CreateMLProgram()) { + using namespace CoreML::Specification::MILSpec; + + std::string_view coreml_op_type; + bool is_global = false; + bool is_avg_pool = false; + if (op_type == "GlobalAveragePool") { + // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.reduction.reduce_mean + coreml_op_type = "reduce_mean"; + is_global = true; + } else if (op_type == "GlobalMaxPool") { + // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.reduction.reduce_max + coreml_op_type = "reduce_max"; + is_global = true; + } else if (op_type == "AveragePool") { + // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.pool.avg_pool + coreml_op_type = "avg_pool"; + is_avg_pool = true; + } else if (op_type == "MaxPool") { + // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.pool.max_pool + coreml_op_type = "max_pool"; + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "PoolOpBuilder, unexpected op: ", op_type); + } - if (is_global_pooling) { - coreml_pool->set_globalpooling(true); - coreml_pool->mutable_valid(); - } else { // AveragePool or MaxPool - NodeAttrHelper helper(node); - const auto kernel_shape = helper.Get("kernel_shape", std::vector{0, 0}); - const auto strides = helper.Get("strides", std::vector{1, 1}); - const auto onnx_pads = helper.Get("pads", std::vector{0, 0, 0, 0}); - - coreml_pool->add_kernelsize(kernel_shape[0]); - coreml_pool->add_kernelsize(kernel_shape[1]); - coreml_pool->add_stride(strides[0]); - coreml_pool->add_stride(strides[1]); - coreml_pool->set_avgpoolexcludepadding(helper.Get("count_include_pad", 0) == 0); - coreml_pool->set_globalpooling(false); - - // Add Padding - // Usually using autopadding is more efficient than using explicit padding - // Try to see if we can map explicit padding to auto padding - std::vector input_shape; - ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get shape"); - AutoPadType auto_pad_type; - ORT_RETURN_IF_ERROR(HandleAutoPad(input_shape, kernel_shape[0], kernel_shape[1], - onnx_pads, strides, {1, 1} /* dilations */, - StringToAutoPadType(helper.Get("auto_pad", "NOTSET")), - auto_pad_type)); - - if (AutoPadType::SAME_UPPER == auto_pad_type || AutoPadType::SAME_LOWER == auto_pad_type) { - auto* padding_type = coreml_pool->mutable_same(); - if (AutoPadType::SAME_LOWER == auto_pad_type) { // default is SAME_UPPER - padding_type->set_asymmetrymode(COREML_SPEC::SamePadding_SamePaddingMode_TOP_LEFT_HEAVY); + std::unique_ptr op = model_builder.CreateOperation(node, coreml_op_type); + + AddOperationInput(*op, "x", input_defs[0]->Name()); + + if (is_global) { + // keep N and C dims, reduce the rest with keepdims=True. equivalent to the ONNX Global*Pool ops. + std::vector axes{2, 3}; // we only support 4D input currently. + AddOperationInput(*op, "axes", model_builder.AddConstant(op->type(), "axes", axes)); + AddOperationInput(*op, "keep_dims", model_builder.AddScalarConstant(op->type(), "keep_dims", true)); + } else { + NodeAttrHelper helper(node); + constexpr int num_spatial_dims = 2; // we only support 4D. -2 for N and C dims. + + AddPadTypeAndPads(*op, model_builder, op->type(), helper, num_spatial_dims); + + const auto kernel_shape = helper.GetInt64s("kernel_shape"); // required + AddOperationInput(*op, "kernel_sizes", model_builder.AddConstant(op->type(), "kernel_sizes", *kernel_shape)); + + // in theory all these values are optional according to the CoreML spec but simpler to just provide default + // values as the actual model compilation tends to require them. + const auto strides = helper.Get("strides", std::vector(num_spatial_dims, 1)); + const bool ceil_mode = helper.Get("ceil_mode", int64_t(0)); // convert int64_t to bool + + AddOperationInput(*op, "strides", model_builder.AddConstant(op->type(), "strides", strides)); + AddOperationInput(*op, "ceil_mode", model_builder.AddScalarConstant(op->type(), "ceil_mode", ceil_mode)); + + if (is_avg_pool) { + const bool count_exclude_pad = helper.Get("count_include_pad", int64_t(0)) == 0; + AddOperationInput(*op, "exclude_padding_from_average", + model_builder.AddScalarConstant(op->type(), "count_exclude_pad", count_exclude_pad)); } + } + + AddOperationOutput(*op, *node.OutputDefs()[0]); + model_builder.AddOperation(std::move(op)); + + } else +#endif // defined(COREML_ENABLE_MLPROGRAM) + { + std::unique_ptr layer = model_builder.CreateNNLayer(node); + + auto* coreml_pool = layer->mutable_pooling(); + + bool is_global_pooling = false; + if (op_type == "GlobalAveragePool") { + is_global_pooling = true; + coreml_pool->set_type(COREML_SPEC::PoolingLayerParams_PoolingType_AVERAGE); + } else if (op_type == "GlobalMaxPool") { + is_global_pooling = true; + coreml_pool->set_type(COREML_SPEC::PoolingLayerParams_PoolingType_MAX); + } else if (op_type == "AveragePool") { + coreml_pool->set_type(COREML_SPEC::PoolingLayerParams_PoolingType_AVERAGE); + } else if (op_type == "MaxPool") { + coreml_pool->set_type(COREML_SPEC::PoolingLayerParams_PoolingType_MAX); } else { - auto* padding_type = coreml_pool->mutable_valid(); - if (AutoPadType::NOTSET == auto_pad_type && onnx_pads != std::vector{0, 0, 0, 0}) { - // NOTSET is adding the explicit padding to the ValidPadding.paddingAmounts - auto* height_border = padding_type->mutable_paddingamounts()->add_borderamounts(); - height_border->set_startedgesize(onnx_pads[0]); - height_border->set_endedgesize(onnx_pads[2]); - auto* width_border = padding_type->mutable_paddingamounts()->add_borderamounts(); - width_border->set_startedgesize(onnx_pads[1]); - width_border->set_endedgesize(onnx_pads[3]); + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "PoolOpBuilder, unexpected op: ", op_type); + } + + if (is_global_pooling) { + coreml_pool->set_globalpooling(true); + coreml_pool->mutable_valid(); + } else { // AveragePool or MaxPool + NodeAttrHelper helper(node); + const auto kernel_shape = helper.Get("kernel_shape", std::vector{0, 0}); + const auto strides = helper.Get("strides", std::vector{1, 1}); + const auto onnx_pads = helper.Get("pads", std::vector{0, 0, 0, 0}); + + coreml_pool->add_kernelsize(kernel_shape[0]); + coreml_pool->add_kernelsize(kernel_shape[1]); + coreml_pool->add_stride(strides[0]); + coreml_pool->add_stride(strides[1]); + coreml_pool->set_avgpoolexcludepadding(helper.Get("count_include_pad", 0) == 0); + coreml_pool->set_globalpooling(false); + + // Add Padding + // Usually using autopadding is more efficient than using explicit padding + // Try to see if we can map explicit padding to auto padding + std::vector input_shape; + ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get shape"); + AutoPadType auto_pad_type; + ORT_RETURN_IF_ERROR(HandleAutoPad(input_shape, kernel_shape[0], kernel_shape[1], + onnx_pads, strides, {1, 1} /* dilations */, + StringToAutoPadType(helper.Get("auto_pad", "NOTSET")), + auto_pad_type)); + + if (AutoPadType::SAME_UPPER == auto_pad_type || AutoPadType::SAME_LOWER == auto_pad_type) { + auto* padding_type = coreml_pool->mutable_same(); + if (AutoPadType::SAME_LOWER == auto_pad_type) { // default is SAME_UPPER + padding_type->set_asymmetrymode(COREML_SPEC::SamePadding_SamePaddingMode_TOP_LEFT_HEAVY); + } + } else { + auto* padding_type = coreml_pool->mutable_valid(); + if (AutoPadType::NOTSET == auto_pad_type && onnx_pads != std::vector{0, 0, 0, 0}) { + // NOTSET is adding the explicit padding to the ValidPadding.paddingAmounts + auto* height_border = padding_type->mutable_paddingamounts()->add_borderamounts(); + height_border->set_startedgesize(onnx_pads[0]); + height_border->set_endedgesize(onnx_pads[2]); + auto* width_border = padding_type->mutable_paddingamounts()->add_borderamounts(); + width_border->set_startedgesize(onnx_pads[1]); + width_border->set_endedgesize(onnx_pads[3]); + } } } - } - *layer->mutable_input()->Add() = input_defs[0]->Name(); - *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + *layer->mutable_input()->Add() = input_defs[0]->Name(); + *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + + model_builder.AddLayer(std::move(layer)); + } - model_builder.AddLayer(std::move(layer)); return Status::OK(); } -bool PoolOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /* input_params */, +bool PoolOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const { const auto& op_type = node.OpType(); const auto& input_defs = node.InputDefs(); std::vector input_shape; - if (!GetShape(*input_defs[0], input_shape, logger)) + if (!GetShape(*input_defs[0], input_shape, logger)) { return false; + } + // TODO: ML Program supports 3D and 5D. Add if we have a use case for that. const auto input_size = input_shape.size(); if (input_size != 4) { - LOGS(logger, VERBOSE) - << op_type << " only supports rank-4 tensor, input [" - << input_defs[0]->Name() << "] has actual dim count " << input_size; + LOGS(logger, VERBOSE) << op_type << " only supports rank-4 tensor, input [" + << input_defs[0]->Name() << "] has actual dim count " << input_size; return false; } if (op_type == "AveragePool" || op_type == "MaxPool") { NodeAttrHelper helper(node); + const auto storage_order = helper.Get("storage_order", 0); if (storage_order == 1) { LOGS(logger, VERBOSE) << "storage_order == 1 is not supported"; @@ -128,12 +200,14 @@ bool PoolOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPara return false; } - // TODO, add support of the ceil_mode by adjusting the padding - // See https://stackoverflow.com/questions/59906456/in-pytorchs-maxpool2d-is-padding-added-depending-on-ceil-mode - // and https://github.com/apple/coremltools/blob/1931758aae383c83daddfc56f11a24a9d2bf4b87/coremltools/converters/mil/frontend/torch/ops.py#L621-L644 - if (helper.Get("ceil_mode", 0) == 1) { - LOGS(logger, VERBOSE) << "ceil_mode == 1 is not supported for pooling"; - return false; + if (!input_params.create_mlprogram) { + // TODO, add support of the ceil_mode by adjusting the padding + // See https://stackoverflow.com/questions/59906456/in-pytorchs-maxpool2d-is-padding-added-depending-on-ceil-mode + // and https://github.com/apple/coremltools/blob/1931758aae383c83daddfc56f11a24a9d2bf4b87/coremltools/converters/mil/frontend/torch/ops.py#L621-L644 + if (helper.Get("ceil_mode", 0) == 1) { + LOGS(logger, VERBOSE) << "ceil_mode == 1 is not supported for pooling"; + return false; + } } if (helper.Get("dilations", std::vector{1, 1}) != diff --git a/onnxruntime/core/providers/coreml/builders/impl/reshape_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/reshape_op_builder.cc index 7ae1746be3122..27d24d9c21893 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/reshape_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/reshape_op_builder.cc @@ -1,11 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/framework/tensorprotoutils.h" #include "core/optimizer/initializer.h" -#include "core/providers/common.h" #include "core/providers/coreml/builders/helper.h" #include "core/providers/coreml/builders/impl/base_op_builder.h" +#include "core/providers/coreml/builders/impl/builder_utils.h" #include "core/providers/coreml/builders/model_builder.h" #include "core/providers/coreml/builders/op_builder_factory.h" #include "core/providers/coreml/shape_utils.h" @@ -26,34 +25,56 @@ class ReshapeOpBuilder : public BaseOpBuilder { // Reshape opset 4- uses attributes for new shape which we do not support for now int GetMinSupportedOpSet(const Node& /* node */) const override { return 5; } + + bool SupportsMLProgram() const override { return true; } }; void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const { + // Skip the second input which is the new shape as we always have to create a new version as the CoreML rules + // are different from ONNX. model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name()); } Status ReshapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, const logging::Logger& logger) const { - std::unique_ptr layer = model_builder.CreateNNLayer(node); - const auto& input_defs = node.InputDefs(); - const auto& initializers(model_builder.GetInitializerTensors()); - const auto& target_shape_tensor = *initializers.at(input_defs[1]->Name()); - const int64_t* raw_target_shape = target_shape_tensor.int64_data().empty() - ? reinterpret_cast(target_shape_tensor.raw_data().data()) - : target_shape_tensor.int64_data().data(); - - const auto size = target_shape_tensor.dims()[0]; - TensorShapeVector target_shape{raw_target_shape, raw_target_shape + size}; std::vector input_shape; - ORT_RETURN_IF_NOT(GetStaticShape(*input_defs[0], input_shape, logger), "Cannot get shape"); - ReshapeHelper helper(TensorShape(input_shape), target_shape); - *layer->mutable_reshapestatic()->mutable_targetshape() = {target_shape.cbegin(), target_shape.cend()}; - *layer->mutable_input()->Add() = input_defs[0]->Name(); - *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + ORT_RETURN_IF_NOT(GetStaticShape(*input_defs[0], input_shape, logger), "Cannot get shape of data"); + + const auto& data_name = input_defs[0]->Name(); + const auto& new_shape_name = input_defs[1]->Name(); + Initializer unpacked_tensor(*model_builder.GetConstantInitializer(new_shape_name)); + TensorShapeVector new_shape = ToShapeVector(unpacked_tensor.DataAsSpan()); + + // ReshapeHelper applies the ONNX rules to create the concrete output shape + ReshapeHelper helper(TensorShape(input_shape), new_shape); + +#if defined(COREML_ENABLE_MLPROGRAM) + if (model_builder.CreateMLProgram()) { + using namespace CoreML::Specification::MILSpec; - model_builder.AddLayer(std::move(layer)); + // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.tensor_transformation.reshape + std::unique_ptr reshape_op = model_builder.CreateOperation(node, "reshape"); + + AddOperationInput(*reshape_op, "x", data_name); + AddOperationInput(*reshape_op, "shape", + model_builder.AddConstant(reshape_op->type(), "shape", ToConstSpan(new_shape))); + + AddOperationOutput(*reshape_op, *node.OutputDefs()[0]); + + model_builder.AddOperation(std::move(reshape_op)); + } else +#endif // defined(COREML_ENABLE_MLPROGRAM) + { + std::unique_ptr layer = model_builder.CreateNNLayer(node); + + *layer->mutable_reshapestatic()->mutable_targetshape() = {new_shape.cbegin(), new_shape.cend()}; + *layer->mutable_input()->Add() = data_name; + *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + + model_builder.AddLayer(std::move(layer)); + } return Status::OK(); } @@ -61,14 +82,15 @@ bool ReshapeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputP const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& new_shape_name = input_defs[1]->Name(); - const auto& initializers = input_params.graph_viewer.GetAllInitializedTensors(); - if (!Contains(initializers, new_shape_name)) { + const auto* new_shape_tensor = input_params.graph_viewer.GetConstantInitializer(new_shape_name); + if (!new_shape_tensor) { + // ONNX has different rules around how -1 and 0 values are used/combined, and + // we can't check if those can be translated to CoreML if the shape is unknown. LOGS(logger, VERBOSE) << "New shape of reshape must be a constant initializer"; return false; } - const auto& new_shape_tensor = *initializers.at(new_shape_name); - Initializer unpacked_tensor(new_shape_tensor); + Initializer unpacked_tensor(*new_shape_tensor); auto new_shape = unpacked_tensor.DataAsSpan(); if (new_shape.empty()) { LOGS(logger, VERBOSE) << "New shape of reshape cannot be empty"; @@ -84,7 +106,7 @@ bool ReshapeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputP return false; } - // CoreML reshape doesn't support new shape with more than 5 dimensions + // CoreML reshape doesn't support new shape with more than 5 dimensions. if (new_shape.size() > 5) { LOGS(logger, VERBOSE) << "Reshape does not support new shape with rank greater than 5. Input shape: " << Shape2String(input_shape) << ", new shape: " << Shape2String(new_shape); @@ -93,7 +115,7 @@ bool ReshapeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputP // CoreML reshape does not support 0 as dimension NodeAttrHelper helper(node); - const bool allow_zero = helper.Get("allowzero ", 0) == 1; + const bool allow_zero = helper.Get("allowzero", 0) == 1; if (allow_zero) { if (std::find(new_shape.begin(), new_shape.end(), int64_t{0}) != new_shape.end()) { LOGS(logger, VERBOSE) << "Reshape does not support new shape with 0 as dimension when allowzero is enabled. " diff --git a/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc index 35dcde41a6bcf..6c2fcc2ace856 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc @@ -98,7 +98,7 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const auto& input_defs = node.InputDefs(); const auto& initializers(model_builder.GetInitializerTensors()); - if (input_defs.size() == 3) { // use scales + if (input_defs.size() >= 3 && input_defs[2]->Exists()) { // use scales std::vector scales; ORT_RETURN_IF_NOT(GetResizeScales(initializers, node, scales, logger), "Error getting resize scales"); coreml_upsample->add_scalingfactor(static_cast(scales[2])); @@ -182,20 +182,24 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPa return false; } + bool using_scales = input_defs.size() >= 3 && input_defs[2]->Exists(); // scales - if (input_defs.size() == 3 && !Contains(initializers, input_defs[2]->Name())) { - LOGS(logger, VERBOSE) << "Input scales of Resize must be known"; + if (using_scales && !input_params.graph_viewer.GetConstantInitializer(input_defs[2]->Name())) { + LOGS(logger, VERBOSE) << "scales input of Resize must be a constant initializer"; return false; } // sizes - if (input_defs.size() > 3 && !Contains(initializers, input_defs[3]->Name())) { - LOGS(logger, VERBOSE) << "Input sizes of Resize must be known"; + if (!using_scales && + (input_defs.size() < 4 || + !input_defs[3]->Exists() || + !input_params.graph_viewer.GetConstantInitializer(input_defs[3]->Name()))) { + LOGS(logger, VERBOSE) << "sizes input of Resize must be a constant initializer"; return false; } // We want to check if the scales or sizes are not trying to resize on N/C channels here - if (input_defs.size() == 3) { // we are using scales + if (using_scales) { std::vector scales; if (!GetResizeScales(initializers, node, scales, logger)) return false; diff --git a/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc index b716af738e1b1..39bfbfe5bba1f 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc @@ -54,7 +54,7 @@ Status PrepareSliceComputeMetadataFromConstantInitializers(const Node& slice_nod return Status::OK(); } - const auto* tensor_proto = graph_viewer.GetConstantInitializer(input_defs[input_idx]->Name(), true); + const auto* tensor_proto = graph_viewer.GetConstantInitializer(input_defs[input_idx]->Name()); ORT_RETURN_IF_NOT(tensor_proto, "Failed to get constant initializer."); Initializer unpacked_tensor(*tensor_proto, graph_viewer.ModelPath()); const auto data_type = unpacked_tensor.data_type(); diff --git a/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc index 266396a0fe90e..d6584124c6aba 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc @@ -52,7 +52,7 @@ Status SoftmaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, target_shape.push_back(size_to_dimension); target_shape.push_back(size_from_dimension); - const auto reshape1_output_name = model_builder.GetUniqueName(MakeString(node.Name(), "reshape1_output")); + const auto reshape1_output_name = model_builder.GetUniqueName(node, "reshape1_output"); { // Add reshape layer auto reshape_layer = model_builder.CreateNNLayer(node, "_Softmax_reshape1"); *reshape_layer->mutable_reshapestatic()->mutable_targetshape() = {target_shape.cbegin(), target_shape.cend()}; @@ -60,7 +60,7 @@ Status SoftmaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, *reshape_layer->mutable_output()->Add() = reshape1_output_name; model_builder.AddLayer(std::move(reshape_layer)); } - const auto softmax_output_name = model_builder.GetUniqueName(MakeString(node.Name(), "softmax_output")); + const auto softmax_output_name = model_builder.GetUniqueName(node, "softmax_output"); { auto* coreml_softmaxnd = layer->mutable_softmaxnd(); coreml_softmaxnd->set_axis(-1); diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.cc b/onnxruntime/core/providers/coreml/builders/model_builder.cc index daab36f7b933d..eb4723a3b9746 100644 --- a/onnxruntime/core/providers/coreml/builders/model_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/model_builder.cc @@ -144,14 +144,18 @@ void CopyOnnxTensorToCoreMLTensor(const ONNX_NAMESPACE::TensorProto& tensor_prot break; } case ONNX_NAMESPACE::TensorProto_DataType_INT64: { - // from: int64_data/raw, to: longints - if (has_raw_data) { - CopyRawDataToRepeatedField(tensor_proto, *tensor_value.mutable_longints()->mutable_values()); - - } else { - tensor_value.mutable_longints()->mutable_values()->CopyFrom(tensor_proto.int64_data()); - } - break; + // enable when this is proven to not be the case + ORT_THROW( + "INT64 is unexpected as CoreML uses 32-bit int for indices. " + "Most likely an initializer that should have been skipped was not."); + //// from: int64_data/raw, to: longints + // if (has_raw_data) { + // CopyRawDataToRepeatedField(tensor_proto, *tensor_value.mutable_longints()->mutable_values()); + + //} else { + // tensor_value.mutable_longints()->mutable_values()->CopyFrom(tensor_proto.int64_data()); + //} + // break; } case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: { // from: int32_data/raw, to: bytes @@ -186,18 +190,22 @@ void CopyOnnxTensorToCoreMLTensor(const ONNX_NAMESPACE::TensorProto& tensor_prot break; } case ONNX_NAMESPACE::TensorProto_DataType_UINT64: { - // from: uint64_data/raw, to: longints - if (has_raw_data) { - CopyRawDataToRepeatedField(tensor_proto, *tensor_value.mutable_longints()->mutable_values()); - } else { - // TODO: Is this safe? Need to check the CopyFrom implementation. As it's a straight copy of bytes this - // hopefully can do it as one block instead of iterating and potentially doing a static_cast of each - // individual value. - tensor_value.mutable_longints()->mutable_values()->CopyFrom( - reinterpret_cast&>(tensor_proto.uint64_data())); - } - - break; + // enable when this is proven to not be the case + ORT_THROW( + "UINT64 is unexpected as CoreML uses 32-bit int for indices. " + "Most likely an initializer that should have been skipped was not."); + //// from: uint64_data/raw, to: longints + // if (has_raw_data) { + // CopyRawDataToRepeatedField(tensor_proto, *tensor_value.mutable_longints()->mutable_values()); + // } else { + // // TODO: Is this safe? Need to check the CopyFrom implementation. As it's a straight copy of bytes this + // // hopefully can do it as one block instead of iterating and potentially doing a static_cast of each + // // individual value. + // tensor_value.mutable_longints()->mutable_values()->CopyFrom( + // reinterpret_cast&>(tensor_proto.uint64_data())); + // } + + // break; } case ONNX_NAMESPACE::TensorProto_DataType_BOOL: { // from: int32_data/raw, to: bools @@ -392,23 +400,28 @@ std::string GetModelOutputPath(bool create_ml_program) { } // namespace ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const logging::Logger& logger, - int32_t coreml_version, uint32_t coreml_flags) + int32_t coreml_version, uint32_t coreml_flags, + std::vector&& onnx_input_names, + std::vector&& onnx_output_names) : graph_viewer_(graph_viewer), logger_(logger), coreml_version_(coreml_version), coreml_flags_(coreml_flags), create_ml_program_((coreml_flags_ & COREML_FLAG_CREATE_MLPROGRAM) != 0), model_output_path_(GetModelOutputPath(create_ml_program_)), + onnx_input_names_(std::move(onnx_input_names)), + onnx_output_names_(std::move(onnx_output_names)), coreml_model_(std::make_unique()) { if (create_ml_program_) { #if defined(COREML_ENABLE_MLPROGRAM) coreml_model_->set_specificationversion(CoreMLSpecVersion()); MILSpec::Program& mlprogram = *coreml_model_->mutable_mlprogram(); - MILSpec::Function& main = (*mlprogram.mutable_functions())["main"]; + mlprogram.set_version(1); + mlprogram_main_fn_ = &(*mlprogram.mutable_functions())["main"]; const std::string coreml_opset = "CoreML" + std::to_string(CoreMLVersion()); - *main.mutable_opset() = coreml_opset; - mlprogram_main_ = &(*main.mutable_block_specializations())[coreml_opset]; + *mlprogram_main_fn_->mutable_opset() = coreml_opset; + mlprogram_main_block_ = &(*mlprogram_main_fn_->mutable_block_specializations())[coreml_opset]; // create the ModelPackage. this creates the output directory. mlpackage_ = std::make_unique(model_output_path_, /* create */ true); @@ -426,6 +439,8 @@ ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const logging::Logge weights_file_writer_ = std::make_unique(weights_info->path() + "/weight.bin"); #else // should never happen due to handling in coreml_execution_provider.cc + // throw here so all other code in this class can assume create_ml_program_ is only ever true in a build + // where ML Program support is enabled. ORT_THROW("ML Program is not enabled in this build"); #endif } else { @@ -435,6 +450,28 @@ ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const logging::Logge neural_network->set_arrayinputshapemapping( CoreML::Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING); } + + // populate names. + const auto& initializers = graph_viewer_.GetAllInitializedTensors(); + const auto& inputs = graph_viewer_.GetInputs(); + // rough guess to try and avoid reallocs. most nodes produce one output but some have more so allow for that. + // also need to convert attributes to constants so allow for that + unique_names_.reserve(initializers.size() + inputs.size() + size_t(graph_viewer_.NumberOfNodes() * 1.5)); + for (const auto& pair : initializers) { + unique_names_.insert(pair.first); + } + + for (const auto* input : inputs) { + unique_names_.insert(input->Name()); + } + + for (const auto& node : graph_viewer_.Nodes()) { + for (const auto& def : node.OutputDefs()) { + if (def->Exists()) { + unique_names_.insert(def->Name()); + } + } + } } ModelBuilder::~ModelBuilder() = default; @@ -455,11 +492,94 @@ void ModelBuilder::AddLayer(std::unique_ptr layer) { neural_network->mutable_layers()->AddAllocated(layer.release()); } -#if defined(COREML_ENABLE_MLPROGRAM) - /* * ML Program related helpers */ +#if defined(COREML_ENABLE_MLPROGRAM) +const std::string& ModelBuilder::GetSafeName(const std::string& name) { + // Check the name is valid according to the MILSpec rules + // `Identifiers, generally used for names and keys, must match the regular expression [A-Za-z\_][A-Za-z0-9\_@]*.` + // + // There is a secondary list of reserved words that the coremltools python uses, but it's not clear if those are + // required here, or if we will ever hit a model that uses one of them. Due to that, skip checking them for now as + // it adds cost and code complexity + // https://github.com/apple/coremltools/blob/8b37641f243b1a3e81452feea311c6e30dcc9287/coremltools/converters/mil/mil/passes/defs/preprocess.py#L151C1-L175C10 + // static InlinedHashSet reserved_names = + // {"any", "bool", "program", "func", "tensor", "list", "dict", "tuple", "true", "false", + // "string", "bf16", "fp16", "fp32", "fp64", "int8", "int16", "int32", "int64", + // "uint8", "uint16", "uint32", "uint64"}; + + // handle empty name. shouldn't happen but code below assumes name is not empty + if (name.empty()) { + return name; + } + + // We don't need '@' or '\' even though they're allowed. Optimize for a good name that does not need to be changed. + + // has been sanitized and changed already + const auto entry = values_to_rename_.find(name); + if (entry != values_to_rename_.end()) { + return entry->second; + } + + // Replace anything but a good char with '_'. If first char is 0-9 we prefix with '_'; + bool changed = false; + std::string result = name; + + if (std::isdigit(result[0])) { + changed = true; + result = '_' + name; + } + + for (char& c : result) { + if (!std::isalnum(c) && c != '_') { + changed = true; + c = '_'; + } + } + + if (!changed) { + return name; // return original as the return value is a reference that must remain valid + } + + return (values_to_rename_[name] = GetUniqueName(result)); +} + +void ModelBuilder::SanitizeNames() { + // ML Model level inputs/outputs + auto* desc = coreml_model_->mutable_description(); + for (auto& input : *desc->mutable_input()) { + input.set_name(GetSafeName(input.name())); + } + + for (auto& output : *desc->mutable_output()) { + output.set_name(GetSafeName(output.name())); + } + + // main function inputs/outputs. + for (auto& input : *mlprogram_main_fn_->mutable_inputs()) { + input.set_name(GetSafeName(input.name())); + } + + // outputs from block with operations for current coreml version + for (auto& output : *mlprogram_main_block_->mutable_outputs()) { + output = GetSafeName(output); + } + + // iterate operations changing input/output/node names + for (auto& op : *mlprogram_main_block_->mutable_operations()) { + for (auto& input : *op.mutable_inputs()) { + for (auto& arg : *input.second.mutable_arguments()) { + arg.set_name(GetSafeName(arg.name())); + } + } + + for (auto& output : *op.mutable_outputs()) { + output.set_name(GetSafeName(output.name())); + } + } +} + std::unique_ptr ModelBuilder::CreateOperation(const Node& node, std::string_view op_type, std::string_view suffix) { @@ -472,14 +592,9 @@ std::unique_ptr ModelBuilder::CreateOperation(c return op; } -void ModelBuilder::AddConstant(std::string_view name, const ONNX_NAMESPACE::TensorProto& initializer) { - MILSpec::Value coreml_tensor = OnnxTensorToCoreMLTensor(initializer, *weights_file_writer_); - AddConstantOperation(name, std::move(coreml_tensor)); -} - -void ModelBuilder::AddConstantOperation(std::string_view name, MILSpec::Value&& coreml_tensor) { +const std::string& ModelBuilder::AddConstantOperation(std::string_view name, MILSpec::Value&& coreml_tensor) { // Replicates coremltools/converters/mil/backend/mil/load.py translate_const logic - MILSpec::Operation& const_op = *mlprogram_main_->mutable_operations()->Add(); + MILSpec::Operation& const_op = *mlprogram_main_block_->mutable_operations()->Add(); const_op.set_type("const"); MILSpec::NamedValueType& output = *const_op.mutable_outputs()->Add(); @@ -487,58 +602,63 @@ void ModelBuilder::AddConstantOperation(std::string_view name, MILSpec::Value&& *output.mutable_type() = coreml_tensor.type(); auto& attr_map = *const_op.mutable_attributes(); - attr_map["name"] = CreateScalarTensorValue(std::string(name)); + // the operation name doesn't really matter as it isn't used elsewhere, so sanitize name now + attr_map["name"] = CreateScalarTensorValue(GetSafeName(output.name())); attr_map["val"] = std::move(coreml_tensor); + + return output.name(); } // Add operation to the Block for the main function in the ML Program void ModelBuilder::AddOperation(std::unique_ptr operation) { - mlprogram_main_->mutable_operations()->AddAllocated(operation.release()); + mlprogram_main_block_->mutable_operations()->AddAllocated(operation.release()); } -std::string ModelBuilder::AddTensorValueAsConstantOperation(std::string_view op_type, std::string_view value_type, - MILSpec::Value&& input_value) { +const std::string& ModelBuilder::AddTensorValueAsConstantOperation(std::string_view op_type, + std::string_view value_type, + MILSpec::Value&& input_value) { auto unique_value_name = GetUniqueName(MakeString(op_type, "_", value_type)); - AddConstantOperation(unique_value_name, std::move(input_value)); - return unique_value_name; + return AddConstantOperation(unique_value_name, std::move(input_value)); } template -std::string ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, gsl::span value, - std::optional> shape) { +std::string_view ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, + gsl::span value, + std::optional> shape) { // add specialization below static_assert(false_for_T, "Missing specialization for value type"); - return ""; // unreachable + + return "ModelBuilder::AddConstant error"; // unreachable } template <> -std::string ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, - gsl::span value, - std::optional> shape) { +std::string_view ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, + gsl::span value, + std::optional> shape) { auto input_value = CreateTensorValue(value, shape); return AddTensorValueAsConstantOperation(op_type, value_type, std::move(input_value)); } template <> -std::string ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, - gsl::span value, - std::optional> shape) { +std::string_view ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, + gsl::span value, + std::optional> shape) { auto input_value = CreateTensorValue(value, shape); // CoreML uses int32 return AddTensorValueAsConstantOperation(op_type, value_type, std::move(input_value)); } template <> -std::string ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, - gsl::span value, - std::optional> shape) { +std::string_view ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, + gsl::span value, + std::optional> shape) { auto input_value = CreateTensorValue(value, shape); return AddTensorValueAsConstantOperation(op_type, value_type, std::move(input_value)); } template <> -std::string ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, - gsl::span value, - std::optional> shape) { +std::string_view ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, + gsl::span value, + std::optional> shape) { auto input_value = CreateTensorValue(value, shape); return AddTensorValueAsConstantOperation(op_type, value_type, std::move(input_value)); } @@ -581,11 +701,13 @@ Status ModelBuilder::RegisterInitializers() { continue; } - if (create_ml_program_) { #if defined(COREML_ENABLE_MLPROGRAM) - AddConstant(name, tensor); + if (create_ml_program_) { + MILSpec::Value coreml_tensor = OnnxTensorToCoreMLTensor(tensor, *weights_file_writer_); + ORT_IGNORE_RETURN_VALUE(AddConstantOperation(name, std::move(coreml_tensor))); + } else #endif - } else { + { std::unique_ptr layer = std::make_unique(); layer->set_name(GetUniqueName("initializer_" + name)); @@ -616,32 +738,33 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i if (is_input) { // input should not be an initializer - if (Contains(GetInitializerTensors(), name)) + if (Contains(GetInitializerTensors(), name)) { return Status::OK(); + } // This input will not be used - if (Contains(skipped_inputs_, name)) + if (Contains(skipped_inputs_, name)) { return Status::OK(); + } } auto* model_description = coreml_model_->mutable_description(); - auto& input_output = is_input - ? *model_description->mutable_input()->Add() - : *model_description->mutable_output()->Add(); + auto& input_output = is_input ? *model_description->mutable_input()->Add() + : *model_description->mutable_output()->Add(); input_output.set_name(name); + auto* multi_array = input_output.mutable_type()->mutable_multiarraytype(); std::vector shape; - ORT_RETURN_IF_NOT(GetShape(node_arg, shape, logger_), - "Unable to get shape for ", input_output_type, ": ", name); + ORT_RETURN_IF_NOT(GetShape(node_arg, shape, logger_), "Unable to get shape for ", input_output_type, ": ", name); if (shape.empty()) { - // If we have an empty shape, this is a scalar input, - // Since all the input output of CoreML EP is MultiArray, we will make the scalar input output as a {1} MultiArray + // If we have an empty shape, this is a scalar + // Since all the input/output of CoreML EP is MultiArray, we will make the scalar input/output a {1} MultiArray shape.push_back(1); - // we need to change the shapes of these scalar outputs back to {} when CoreML EP returns these values to ORT + // we need to change the shapes of scalar outputs back to {} when CoreML EP returns values to ORT if (!is_input) { AddScalarOutput(name); } @@ -713,13 +836,20 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i #if defined(COREML_ENABLE_MLPROGRAM) if (create_ml_program_) { - MILSpec::Function& main = (*coreml_model_->mutable_mlprogram()->mutable_functions())["main"]; if (is_input) { - // the model inputs need to be wired up as args to the 'main' function - main.mutable_inputs()->Add(CreateNamedTensorValueType(node_arg)); + // the model inputs need to be wired up as args to the 'main' function. + auto tensor_value_type = CreateNamedTensorValueType(node_arg); + tensor_value_type.set_name(name); + if (node_arg.Shape()->dim_size() == 0) { + // update shape from {} to {1} (same change we made at the model input level above). + tensor_value_type.mutable_type()->mutable_tensortype()->set_rank(1); + tensor_value_type.mutable_type()->mutable_tensortype()->add_dimensions()->mutable_constant()->set_size(1); + } + + mlprogram_main_fn_->mutable_inputs()->Add(std::move(tensor_value_type)); } else { // the model outputs need to be set as outputs of the Block for the 'main' function - *mlprogram_main_->mutable_outputs()->Add() = node_arg.Name(); + *mlprogram_main_block_->mutable_outputs()->Add() = name; } } #endif // defined(COREML_ENABLE_MLPROGRAM) @@ -744,7 +874,7 @@ Status ModelBuilder::ProcessNodes() { // This shouldn't happen as this is called from CoreMLExecutionProvider::Compile and should only be processing // nodes that we said were supported and were returned from CoreMLExecutionProvider::GetCapability. return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, - "Node [", node.Name(), "], type [", node.OpType(), "] is not supported"); + "Node [", node.Name(), "], type [", node.OpType(), "] was not able to be processed"); } } @@ -767,6 +897,12 @@ Status ModelBuilder::CreateModel() { ORT_RETURN_IF_ERROR(ProcessNodes()); ORT_RETURN_IF_ERROR(RegisterModelOutputs()); +#if defined(COREML_ENABLE_MLPROGRAM) + if (create_ml_program_) { + SanitizeNames(); + } +#endif + return Status::OK(); } @@ -795,7 +931,7 @@ Status ModelBuilder::SaveModel() { #if defined(COREML_ENABLE_MLPROGRAM) // need to delete the ModelPackage instance for it to write out the manifest. clear out the other ML Program // related types as well. - mlprogram_main_ = nullptr; + mlprogram_main_block_ = nullptr; mlpackage_.reset(); weights_file_writer_.reset(); #endif @@ -804,11 +940,51 @@ Status ModelBuilder::SaveModel() { } Status ModelBuilder::LoadModel(std::unique_ptr& model) { - model = std::make_unique(model_output_path_, - std::move(input_output_info_), - std::move(scalar_outputs_), - std::move(int64_outputs_), - logger_, coreml_flags_); +#if defined(COREML_ENABLE_MLPROGRAM) + if (create_ml_program_) { + // we need to provide the sanitized names for model inputs/outputs so that info is captured. + // the input/output matching when we execute the model from the CoreML EP is based on order, so the change + // to the names doesn't matter for that. + auto get_sanitized_names = [this](std::vector&& names) -> std::vector { + std::vector output(std::move(names)); + + for (std::string& name : output) { + name = GetSafeName(name); + } + + return output; + }; + + // also need to update the keys in input_output_info_ + auto get_sanitized_io_info = [this](std::unordered_map&& info) { + std::unordered_map output; + output.reserve(info.size()); + + for (auto entry = info.begin(), end = info.end(); entry != end; ++entry) { + output.emplace(GetSafeName(entry->first), std::move(entry->second)); + } + + return output; + }; + + model = std::make_unique(model_output_path_, + get_sanitized_names(std::move(onnx_input_names_)), + get_sanitized_names(std::move(onnx_output_names_)), + get_sanitized_io_info(std::move(input_output_info_)), + std::move(scalar_outputs_), + std::move(int64_outputs_), + logger_, coreml_flags_); + } else +#endif + { + model = std::make_unique(model_output_path_, + std::move(onnx_input_names_), + std::move(onnx_output_names_), + std::move(input_output_info_), + std::move(scalar_outputs_), + std::move(int64_outputs_), + logger_, coreml_flags_); + } return model->LoadModel(); // load using CoreML API, including compilation } @@ -816,8 +992,11 @@ Status ModelBuilder::LoadModel(std::unique_ptr& model) { // static Status ModelBuilder::Build(const GraphViewer& graph_viewer, const logging::Logger& logger, int32_t coreml_version, uint32_t coreml_flags, + std::vector&& onnx_input_names, + std::vector&& onnx_output_names, std::unique_ptr& model) { - ModelBuilder builder(graph_viewer, logger, coreml_version, coreml_flags); + ModelBuilder builder(graph_viewer, logger, coreml_version, coreml_flags, + std::move(onnx_input_names), std::move(onnx_output_names)); ORT_RETURN_IF_ERROR(builder.CreateModel()); ORT_RETURN_IF_ERROR(builder.SaveModel()); @@ -847,20 +1026,31 @@ void ModelBuilder::AddInputToSkip(const std::string& input_name) { skipped_inputs_.insert(input_name); } -std::string ModelBuilder::GetUniqueName(std::string_view base_name) { +const std::string& ModelBuilder::GetUniqueName(const std::string& base_name) { + if (unique_names_.find(base_name) == unique_names_.end()) { + return *unique_names_.insert(base_name).first; + } + std::string unique_name; - do { - std::ostringstream os; - os << base_name << "_token_" << name_token_++; - unique_name = os.str(); - } while (Contains(unique_names_, unique_name)); + std::string suffix; + + // supports up to 1000 unique names without having to grow in the loop + unique_name.reserve(base_name.size() + 5); + unique_name = base_name; + + while (Contains(unique_names_, unique_name)) { + // assign followed by += to avoid creating temporary strings. + unique_name = base_name; + unique_name += "__"; + unique_name += std::to_string(name_token_++); + } - return unique_name; + return *unique_names_.insert(unique_name).first; } -std::string ModelBuilder::GetUniqueName(const Node& node, std::string_view suffix) { +const std::string& ModelBuilder::GetUniqueName(const Node& node, std::string_view suffix) { if (node.Name().empty()) { - return GetUniqueName(MakeString("Node_", node.Index(), "_", node.OpType(), suffix)); + return GetUniqueName(MakeString(node.OpType(), "_", node.Index(), suffix)); } else { return GetUniqueName(node.Name() + std::string(suffix)); } diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.h b/onnxruntime/core/providers/coreml/builders/model_builder.h index 961ba647257b5..8f85ab2c09e7c 100644 --- a/onnxruntime/core/providers/coreml/builders/model_builder.h +++ b/onnxruntime/core/providers/coreml/builders/model_builder.h @@ -25,17 +25,20 @@ namespace onnxruntime { namespace coreml { class IOpBuilder; -class Model; class ModelBuilder { private: ModelBuilder(const GraphViewer& graph_viewer, const logging::Logger& logger, - int32_t coreml_version, uint32_t coreml_flags); + int32_t coreml_version, uint32_t coreml_flags, + std::vector&& onnx_input_names, + std::vector&& onnx_output_names); public: // Create the CoreML model, serialize to disk, load and compile using the CoreML API and return in `model` static Status Build(const GraphViewer& graph_viewer, const logging::Logger& logger, int32_t coreml_version, uint32_t coreml_flags, + std::vector&& onnx_input_names, + std::vector&& onnx_output_names, std::unique_ptr& model); ~ModelBuilder(); @@ -101,8 +104,8 @@ class ModelBuilder { /// /// Unique name generated for value. template - std::string AddConstant(std::string_view op_type, std::string_view value_type, gsl::span value, - std::optional> shape = std::nullopt) { + std::string_view AddConstant(std::string_view op_type, std::string_view value_type, gsl::span value, + std::optional> shape = std::nullopt) { static_assert(std::is_same_v || std::is_same_v || std::is_same_v || @@ -113,8 +116,8 @@ class ModelBuilder { } template - std::string AddConstant(std::string_view op_type, std::string_view value_type, const std::vector& value, - std::optional> shape = std::nullopt) { + std::string_view AddConstant(std::string_view op_type, std::string_view value_type, const std::vector& value, + std::optional> shape = std::nullopt) { return AddConstant(op_type, value_type, AsSpan(value), shape); } @@ -122,17 +125,10 @@ class ModelBuilder { /// Add a scalar value as a 'const' operation. See AddConstant for details. /// template - std::string AddScalarConstant(std::string_view op_type, std::string_view value_type, const T& value) { + std::string_view AddScalarConstant(std::string_view op_type, std::string_view value_type, const T& value) { return AddConstant(op_type, value_type, AsSpan({value}), AsSpan({})); } - /// - /// Add an existing a constant ONNX initializer to the ML Program as a 'const' operation - /// - /// Initializer name - /// Initializer data - void AddConstant(std::string_view name, const ONNX_NAMESPACE::TensorProto& initializer); - // add the operation to the main function void AddOperation(std::unique_ptr operation); #endif @@ -149,18 +145,26 @@ class ModelBuilder { // be added to CoreML model, since CoreML does not like input unused void AddInputToSkip(const std::string& input_name); - std::string GetUniqueName(std::string_view base_name); - std::string GetUniqueName(const Node& node, std::string_view suffix); + const std::string& GetUniqueName(const std::string& base_name); + const std::string& GetUniqueName(const Node& node, std::string_view suffix); + + const logging::Logger& Logger() const { return logger_; } private: #if defined(COREML_ENABLE_MLPROGRAM) template - std::string AddConstantImpl(std::string_view op_type, std::string_view value_type, gsl::span value, - std::optional> shape = std::nullopt); - - void AddConstantOperation(std::string_view name, COREML_SPEC::MILSpec::Value&& initializer); - std::string AddTensorValueAsConstantOperation(std::string_view op_type, std::string_view value_type, - COREML_SPEC::MILSpec::Value&& input_value); + std::string_view AddConstantImpl(std::string_view op_type, std::string_view value_type, gsl::span value, + std::optional> shape = std::nullopt); + + // apply the CoreML naming rules and fix any invalid names. + const std::string& GetSafeName(const std::string& name); + // sanitize all the names in the ML Model + void SanitizeNames(); + + // add Value as a const operation. return value name in case sanitization changed it + const std::string& AddConstantOperation(std::string_view name, COREML_SPEC::MILSpec::Value&& initializer); + const std::string& AddTensorValueAsConstantOperation(std::string_view op_type, std::string_view value_type, + COREML_SPEC::MILSpec::Value&& input_value); #endif // Convert the ONNX model in graph_viewer_ to a CoreML::Specification::Model and serialize to disk. @@ -193,6 +197,9 @@ class ModelBuilder { const bool create_ml_program_; // ML Program (CoreML5, iOS 15+, macOS 12+) or NeuralNetwork (old) const std::string model_output_path_; // create_ml_program_ ? dir for mlpackage : filename for mlmodel + std::vector onnx_input_names_; + std::vector onnx_output_names_; + std::unique_ptr coreml_model_; std::unordered_set scalar_outputs_; std::unordered_set int64_outputs_; @@ -208,9 +215,19 @@ class ModelBuilder { // mlprogram_main_ is the main block of the CoreML ML Program. // It is set in CreateModel to the CoreML Model.mlprogram.functions['main'].block_specializations['CoreML'] // entry we create. - COREML_SPEC::MILSpec::Block* mlprogram_main_{nullptr}; + COREML_SPEC::MILSpec::Function* mlprogram_main_fn_{nullptr}; // Function that contains a Block with the operations + COREML_SPEC::MILSpec::Block* mlprogram_main_block_{nullptr}; // Block that all the operations are added to std::unique_ptr mlpackage_; std::unique_ptr weights_file_writer_; + + // Values must start with [a-zA-A_] + // Additionally they can't be in a list of reserved words. + // If we need to sanitize an initializer name we do so during PreprocessInitializers and apply the change during + // RegisterInitializers. + // We also check inputs in AddOperation and apply the change there. + // This means an op builder author doesn't need to be aware of the renaming. + // https://github.com/apple/coremltools/blob/8b37641f243b1a3e81452feea311c6e30dcc9287/coremltools/converters/mil/mil/passes/defs/preprocess.py#L146-L149 + std::unordered_map values_to_rename_; #endif }; diff --git a/onnxruntime/core/providers/coreml/coreml_execution_provider.cc b/onnxruntime/core/providers/coreml/coreml_execution_provider.cc index 8e718da07703c..0ba715cc7c6d9 100644 --- a/onnxruntime/core/providers/coreml/coreml_execution_provider.cc +++ b/onnxruntime/core/providers/coreml/coreml_execution_provider.cc @@ -114,28 +114,27 @@ common::Status CoreMLExecutionProvider::Compile(const std::vector& node_compute_funcs) { for (const auto& fused_node_and_graph : fused_nodes_and_graphs) { Node& fused_node = fused_node_and_graph.fused_node; - const onnxruntime::GraphViewer& graph_viewer(fused_node_and_graph.filtered_graph); std::unique_ptr coreml_model; - ORT_RETURN_IF_ERROR(coreml::ModelBuilder::Build(graph_viewer, *GetLogger(), coreml_version_, coreml_flags_, - coreml_model)); - { - const auto& input_defs = fused_node.InputDefs(); - std::vector onnx_input_names(input_defs.size()); - for (size_t i = 0, end = input_defs.size(); i < end; ++i) { - onnx_input_names[i] = input_defs[i]->Name(); - } - coreml_model->SetOnnxInputs(std::move(onnx_input_names)); - } + auto get_names = [](const ConstPointerContainer>& args) -> std::vector { + std::vector names; + names.reserve(args.size()); - { - const auto& output_defs = fused_node.OutputDefs(); - std::vector onnx_output_names(output_defs.size()); - for (size_t i = 0, end = output_defs.size(); i < end; ++i) { - onnx_output_names[i] = output_defs[i]->Name(); - } - coreml_model->SetOnnxOutputs(std::move(onnx_output_names)); + for (const NodeArg* def : args) { + names.push_back(def->Name()); + } + + return names; + }; + + std::vector onnx_input_names = get_names(fused_node.InputDefs()); + std::vector onnx_output_names = get_names(fused_node.OutputDefs()); + + const onnxruntime::GraphViewer& graph_viewer(fused_node_and_graph.filtered_graph); + ORT_RETURN_IF_ERROR(coreml::ModelBuilder::Build(graph_viewer, *GetLogger(), coreml_version_, coreml_flags_, + std::move(onnx_input_names), std::move(onnx_output_names), + coreml_model)); } coreml_models_.emplace(fused_node.Name(), std::move(coreml_model)); @@ -153,13 +152,14 @@ common::Status CoreMLExecutionProvider::Compile(const std::vector(state); - const auto& model_inputs = model->GetOnnxInputs(); - const auto& model_outputs = model->GetOnnxOutputs(); + + // input/output names used by the CoreML model in the order that matches the fused_node InputDefs/OutputDefs + const auto& model_inputs = model->GetOrderedInputs(); + const auto& model_outputs = model->GetOrderedOutputs(); ORT_RETURN_IF_NOT(model_inputs.size() <= num_inputs, "Inconsistent input sizes"); ORT_RETURN_IF_NOT(model_outputs.size() == num_outputs, "Inconsistent output sizes"); @@ -182,28 +182,25 @@ common::Status CoreMLExecutionProvider::Compile(const std::vectorshape; - ORT_RETURN_IF(!coreml::IsStaticShape(inferred_shape) && coreml::DoesShapeSpecifyZeroElements(shape), - "Input (", input_name, ") has a dynamic shape (", coreml::Shape2String(inferred_shape), - ") but the runtime shape (", coreml::Shape2String(shape), - ") has zero elements. This is not supported by the CoreML EP."); - } + const auto& inferred_shape = input_info->shape; + ORT_RETURN_IF(!coreml::IsStaticShape(inferred_shape) && coreml::DoesShapeSpecifyZeroElements(shape), + "Input (", input_name, ") has a dynamic shape (", coreml::Shape2String(inferred_shape), + ") but the runtime shape (", coreml::Shape2String(shape), + ") has zero elements. This is not supported by the CoreML EP."); // If we have an empty shape, this is a scalar input, // Since all the input output of CoreML EP is MultiArray, we will make the scalar input as a {1} MultiArray - if (shape.empty()) + if (shape.empty()) { shape.push_back(1); + } // CoreML MLMultiArray API expect input to be non-const // https://developer.apple.com/documentation/coreml/mlmultiarray/2881219-initwithdatapointer?language=objc void* inputBuffer = const_cast(input_tensor.GetTensorRawData()); - inputs.emplace( - input_name, - coreml::OnnxTensorData{ - coreml::OnnxTensorInfo{tensor_info.GetElementType(), shape}, - inputBuffer, - }); + inputs.emplace(input_name, coreml::OnnxTensorData{ + coreml::OnnxTensorInfo{tensor_info.GetElementType(), shape}, + inputBuffer, + }); } // From this point we will need to take the exclusive lock on the model until the Predict is @@ -215,14 +212,13 @@ common::Status CoreMLExecutionProvider::Compile(const std::vector static_shape) -> void* { + [&ctx, &model_outputs](const std::string& name, + int32_t requested_onnx_tensor_element_type, + gsl::span static_shape) -> void* { const auto model_output_it = std::find(model_outputs.begin(), model_outputs.end(), name); ORT_ENFORCE(model_output_it != model_outputs.end(), "Failed to find CoreML model output name: ", name); - const auto output_idx = gsl::narrow_cast(std::distance(model_outputs.begin(), model_output_it)); + const auto output_idx = gsl::narrow_cast(std::distance(model_outputs.begin(), model_output_it)); auto output_tensor = ctx.GetOutput(output_idx, static_shape.data(), static_shape.size()); const auto type_and_shape_info = output_tensor.GetTensorTypeAndShapeInfo(); @@ -243,13 +239,15 @@ common::Status CoreMLExecutionProvider::Compile(const std::vectorIsScalarOutput(output_name)) + if (model->IsScalarOutput(output_name)) { output_shape.clear(); + } // Since CoreML EP only accepts int32 output type and onnx requires int64 output, // We are going to set the model output (from int32) ->int64 - if (model->IsInt64Output(output_name)) + if (model->IsInt64Output(output_name)) { output_type = ONNX_NAMESPACE::TensorProto_DataType_INT64; + } outputs.emplace(output_name, coreml::OnnxTensorInfo{output_type, output_shape}); } diff --git a/onnxruntime/core/providers/coreml/dump_mlprogram_model.py b/onnxruntime/core/providers/coreml/dump_mlprogram_model.py new file mode 100644 index 0000000000000..a3ceee70684dc --- /dev/null +++ b/onnxruntime/core/providers/coreml/dump_mlprogram_model.py @@ -0,0 +1,27 @@ +import sys + +import coremltools as ct + +if len(sys.argv) < 2: + print(f"Usage: {sys.argv[0]} ") + print("If generated by onnxruntime this will be /Data/com.microsoft.onnxruntime/model.mlmodel") + sys.exit(-1) + +model_path = sys.argv[1] +m = ct.models.MLModel(model_path) + +spec = m.get_spec() +print(spec) + +# Example code if you want to filter output or do more advanced things +# main = spec.mlProgram.functions["main"] +# block = main.block_specializations[main.opset] +# print(f"{len(block.operations)} operators") +# for op in block.operations: +# if op.type == 'const': +# if op.attributes["name"].immediateValue.tensor.strings.values[0] == "conv_0_pad_type_0": +# print(f"Conv pad_type={op.attributes['val'].immediateValue.tensor.strings.values}") +# +# if op.type == 'conv': +# #print(op) +# pass diff --git a/onnxruntime/core/providers/coreml/model/host_utils.h b/onnxruntime/core/providers/coreml/model/host_utils.h index 4f9a014c4d885..a9991ccb945ce 100644 --- a/onnxruntime/core/providers/coreml/model/host_utils.h +++ b/onnxruntime/core/providers/coreml/model/host_utils.h @@ -67,6 +67,12 @@ int CoreMLVersion(); // Get a temporary macOS/iOS temp file path std::string GetTemporaryFilePath(); +#if !defined(NDEBUG) && defined(__APPLE__) +// Override location the model is written to so that a) it's easily found and b) it is not automatically deleted +// when the EP exits. Use to debug the model that is generated. +// See onnxruntime/core/providers/coreml/dump_mlprogram_model.py for a script to dump the ML Program. +constexpr const char* kOverrideModelOutputDirectoryEnvVar = "ORT_COREML_EP_MODEL_DIR"; +#endif } // namespace util } // namespace coreml } // namespace onnxruntime diff --git a/onnxruntime/core/providers/coreml/model/host_utils.mm b/onnxruntime/core/providers/coreml/model/host_utils.mm index 0ae0cf8f0d207..5487ea35388f5 100644 --- a/onnxruntime/core/providers/coreml/model/host_utils.mm +++ b/onnxruntime/core/providers/coreml/model/host_utils.mm @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include "core/platform/env.h" #include "core/providers/coreml/model/host_utils.h" #import @@ -31,6 +32,15 @@ int32_t CoreMLVersion() { std::string GetTemporaryFilePath() { // Get temporary directory for user. NSURL* temporary_directory_url = [NSURL fileURLWithPath:NSTemporaryDirectory() isDirectory:YES]; + +#if !defined(NDEBUG) + std::string path_override = Env::Default().GetEnvironmentVar(kOverrideModelOutputDirectoryEnvVar); + if (!path_override.empty()) { + NSString* ns_path_override = [NSString stringWithUTF8String:path_override.c_str()]; + temporary_directory_url = [NSURL fileURLWithPath:ns_path_override isDirectory:YES]; + } +#endif + // Generate a Unique file name to use. NSString* temporary_filename = [[NSProcessInfo processInfo] globallyUniqueString]; diff --git a/onnxruntime/core/providers/coreml/model/model.h b/onnxruntime/core/providers/coreml/model/model.h index b940c4b768aec..e3cd43d786fc3 100644 --- a/onnxruntime/core/providers/coreml/model/model.h +++ b/onnxruntime/core/providers/coreml/model/model.h @@ -35,6 +35,8 @@ using GetOutputTensorMutableRawDataFn = std::function&& model_input_names, + std::vector&& model_output_names, std::unordered_map&& input_output_info, std::unordered_set&& scalar_outputs, std::unordered_set&& int64_outputs, @@ -60,12 +62,11 @@ class Model { // Mutex for exclusive lock to this model object OrtMutex& GetMutex() { return mutex_; } - // Input and output names in the onnx model's order - const std::vector& GetOnnxInputs() const { return onnx_inputs_; } - void SetOnnxInputs(std::vector&& inputs) { onnx_inputs_ = std::move(inputs); } - - const std::vector& GetOnnxOutputs() const { return onnx_outputs_; } - void SetOnnxOutputs(std::vector&& outputs) { onnx_outputs_ = std::move(outputs); } + // Input and output names in the ORT fused node's order. + // Names may have been adjusted from the originals due to CoreML naming rules. + // We do inputs/outputs based on order at the ONNX level so this doesn't matter. + const std::vector& GetOrderedInputs() const { return model_input_names_; } + const std::vector& GetOrderedOutputs() const { return model_output_names_; } const OnnxTensorInfo* TryGetInputOutputInfo(const std::string& name) const { const auto info_it = input_output_info_.find(name); @@ -80,13 +81,13 @@ class Model { private: std::unique_ptr execution_; + std::vector model_input_names_; // input names in the order of the ORT fused node's inputs + std::vector model_output_names_; // output names in the order of the ORT fused node's outputs + std::unordered_map input_output_info_; std::unordered_set scalar_outputs_; std::unordered_set int64_outputs_; - std::vector onnx_inputs_; - std::vector onnx_outputs_; - OrtMutex mutex_; }; diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index d5cd70bff9479..1434043e064f4 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -19,6 +19,7 @@ #include "core/common/narrow.h" #include "core/common/span_utils.h" #include "core/graph/onnx_protobuf.h" +#include "core/platform/env.h" #include "core/providers/coreml/builders/helper.h" #include "core/providers/coreml/coreml_provider_factory.h" #include "core/providers/coreml/model/host_utils.h" @@ -287,6 +288,14 @@ - (void)cleanup { compiled_model_path_ = nil; } +#if !defined(NDEBUG) + std::string path_override = Env::Default().GetEnvironmentVar(util::kOverrideModelOutputDirectoryEnvVar); + if (!path_override.empty()) { + // don't cleanup + coreml_model_path_ = nil; + } +#endif + if (coreml_model_path_ != nil) { error = nil; [[NSFileManager defaultManager] removeItemAtPath:coreml_model_path_ error:&error]; @@ -487,12 +496,16 @@ Status Predict(const std::unordered_map& inputs, } Model::Model(const std::string& path, + std::vector&& model_input_names, + std::vector&& model_output_names, std::unordered_map&& input_output_info, std::unordered_set&& scalar_outputs, std::unordered_set&& int64_outputs, const logging::Logger& logger, uint32_t coreml_flags) : execution_(std::make_unique(path, logger, coreml_flags)), + model_input_names_(std::move(model_input_names)), + model_output_names_(std::move(model_output_names)), input_output_info_(std::move(input_output_info)), scalar_outputs_(std::move(scalar_outputs)), int64_outputs_(std::move(int64_outputs)) { diff --git a/onnxruntime/core/providers/coreml/model/model_stub.cc b/onnxruntime/core/providers/coreml/model/model_stub.cc index 087c9f8c05d5f..c6f2e7401ea1e 100644 --- a/onnxruntime/core/providers/coreml/model/model_stub.cc +++ b/onnxruntime/core/providers/coreml/model/model_stub.cc @@ -9,12 +9,16 @@ namespace coreml { class Execution {}; Model::Model(const std::string& /*path*/, + std::vector&& model_input_names, + std::vector&& model_output_names, std::unordered_map&& input_output_info, std::unordered_set&& scalar_outputs, std::unordered_set&& int64_outputs, const logging::Logger& /*logger*/, uint32_t /*coreml_flags*/) : execution_(std::make_unique()), + model_input_names_(std::move(model_input_names)), + model_output_names_(std::move(model_output_names)), input_output_info_(std::move(input_output_info)), scalar_outputs_(std::move(scalar_outputs)), int64_outputs_(std::move(int64_outputs)) { diff --git a/onnxruntime/core/providers/cpu/tensor/reshape_helper.h b/onnxruntime/core/providers/cpu/tensor/reshape_helper.h index 5961686674424..d7ceda16e61ea 100644 --- a/onnxruntime/core/providers/cpu/tensor/reshape_helper.h +++ b/onnxruntime/core/providers/cpu/tensor/reshape_helper.h @@ -37,12 +37,14 @@ class ReshapeHelper { if (unknown_dim != -1) { // calculate unknown dimension ORT_ENFORCE(size != 0 && (input_shape_size % size) == 0, - "The input tensor cannot be reshaped to the requested shape. Input shape:", input_shape, ", requested shape:", TensorShape(requested_shape)); + "The input tensor cannot be reshaped to the requested shape. Input shape:", input_shape, + ", requested shape:", TensorShape(requested_shape)); requested_shape[unknown_dim] = input_shape_size / size; } else { // check if the output shape is valid. ORT_ENFORCE(input_shape_size == size, - "The input tensor cannot be reshaped to the requested shape. Input shape:", input_shape, ", requested shape:", TensorShape(requested_shape)); + "The input tensor cannot be reshaped to the requested shape. Input shape:", input_shape, + ", requested shape:", TensorShape(requested_shape)); } } }; diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 7d4111e3b9c39..729ad34368453 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -64,17 +64,22 @@ namespace perftest { "\t Refer to onnxruntime_session_options_config_keys.h for valid keys and values. \n" "\t [Example] -C \"session.disable_cpu_ep_fallback|1 ep.context_enable|1\" \n" "\t-i: Specify EP specific runtime options as key value pairs. Different runtime options available are: \n" + "\t [Usage]: -e -i '| |'\n" + "\n" "\t [DML only] [performance_preference]: DML device performance preference, options: 'default', 'minimum_power', 'high_performance', \n" "\t [DML only] [device_filter]: DML device filter, options: 'any', 'gpu', 'npu', \n" "\t [DML only] [disable_metacommands]: Options: 'true', 'false', \n" "\t [DML only] [enable_dynamic_graph_fusion]: Options: 'true', 'false', \n" "\t [DML only] [enable_graph_serialization]: Options: 'true', 'false', \n" + "\n" "\t [OpenVINO only] [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.\n" "\t [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n" "\t [OpenVINO only] [enable_npu_fast_compile]: Optionally enabled to speeds up the model's compilation on NPU device targets.\n" "\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n" "\t [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n" "\t [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n" + "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"\"\"\n" + "\n" "\t [QNN only] [backend_path]: QNN backend path. e.g '/folderpath/libQnnHtp.so', '/folderpath/libQnnCpu.so'.\n" "\t [QNN only] [profiling_level]: QNN profiling level, options: 'basic', 'detailed', default 'off'.\n" "\t [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n" @@ -89,9 +94,8 @@ namespace perftest { "\t [QNN only] [htp_arch]: The minimum HTP architecture. The driver will use ops compatible with this architecture. \n" "\t Options are '0', '68', '69', '73', '75'. Defaults to '0' (none). \n" "\t [QNN only] [device_id]: The ID of the device to use when setting 'htp_arch'. Defaults to '0' (for single device). \n" - "\t [Usage]: -e -i '| |'\n\n" - "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"\"\"\n" - "\t [Example] [For QNN EP] -e qnn -i \"backend_path|/folderpath/libQnnCpu.so\" \n\n" + "\t [Example] [For QNN EP] -e qnn -i \"backend_path|/folderpath/libQnnCpu.so\" \n" + "\n" "\t [TensorRT only] [trt_max_partition_iterations]: Maximum iterations for TensorRT parser to get capability.\n" "\t [TensorRT only] [trt_min_subgraph_size]: Minimum size of TensorRT subgraphs.\n" "\t [TensorRT only] [trt_max_workspace_size]: Set TensorRT maximum workspace size in byte.\n" @@ -108,20 +112,23 @@ namespace perftest { "\t [TensorRT only] [trt_force_sequential_engine_build]: Force TensorRT engines to be built sequentially.\n" "\t [TensorRT only] [trt_context_memory_sharing_enable]: Enable TensorRT context memory sharing between subgraphs.\n" "\t [TensorRT only] [trt_layer_norm_fp32_fallback]: Force Pow + Reduce ops in layer norm to run in FP32 to avoid overflow.\n" - "\t [Usage]: -e -i '| |'\n\n" - "\t [Example] [For TensorRT EP] -e tensorrt -i 'trt_fp16_enable|true trt_int8_enable|true trt_int8_calibration_table_name|calibration.flatbuffers trt_int8_use_native_calibration_table|false trt_force_sequential_engine_build|false'\n" + "\t [Example] [For TensorRT EP] -e tensorrt -i 'trt_fp16_enable|true trt_int8_enable|true trt_int8_calibration_table_name|calibration.flatbuffers trt_int8_use_native_calibration_table|false trt_force_sequential_engine_build|false'\n" + "\n" "\t [NNAPI only] [NNAPI_FLAG_USE_FP16]: Use fp16 relaxation in NNAPI EP..\n" "\t [NNAPI only] [NNAPI_FLAG_USE_NCHW]: Use the NCHW layout in NNAPI EP.\n" "\t [NNAPI only] [NNAPI_FLAG_CPU_DISABLED]: Prevent NNAPI from using CPU devices.\n" "\t [NNAPI only] [NNAPI_FLAG_CPU_ONLY]: Using CPU only in NNAPI EP.\n" - "\t [Usage]: -e -i ' '\n\n" - "\t [Example] [For NNAPI EP] -e nnapi -i \" NNAPI_FLAG_USE_FP16 NNAPI_FLAG_USE_NCHW NNAPI_FLAG_CPU_DISABLED \"\n" + "\t [Example] [For NNAPI EP] -e nnapi -i \"NNAPI_FLAG_USE_FP16 NNAPI_FLAG_USE_NCHW NNAPI_FLAG_CPU_DISABLED\"\n" + "\n" + "\t [CoreML only] [COREML_FLAG_CREATE_MLPROGRAM]: Create an ML Program model instead of Neural Network.\n" + "\t [Example] [For CoreML EP] -e coreml -i \"COREML_FLAG_CREATE_MLPROGRAM\"\n" + "\n" "\t [SNPE only] [runtime]: SNPE runtime, options: 'CPU', 'GPU', 'GPU_FLOAT16', 'DSP', 'AIP_FIXED_TF'. \n" "\t [SNPE only] [priority]: execution priority, options: 'low', 'normal'. \n" "\t [SNPE only] [buffer_type]: options: 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. default: ITENSOR'. \n" "\t [SNPE only] [enable_init_cache]: enable SNPE init caching feature, set to 1 to enabled it. Disabled by default. \n" - "\t [Usage]: -e -i '| |' \n\n" - "\t [Example] [For SNPE EP] -e snpe -i \"runtime|CPU priority|low\" \n\n" + "\t [Example] [For SNPE EP] -e snpe -i \"runtime|CPU priority|low\" \n\n" + "\n" "\t-T [Set intra op thread affinities]: Specify intra op thread affinity string\n" "\t [Example]: -T 1,2;3,4;5,6 or -T 1-2;3-4;5-6 \n" "\t\t Use semicolon to separate configuration between threads.\n" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 1934314b8ce43..9679ca6159464 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -468,7 +468,10 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); nnapi_flags |= NNAPI_FLAG_CPU_ONLY; } else if (key.empty()) { } else { - ORT_THROW("[ERROR] [NNAPI] wrong key type entered. Choose from the following runtime key options that are available for NNAPI. ['NNAPI_FLAG_USE_FP16', 'NNAPI_FLAG_USE_NCHW', 'NNAPI_FLAG_CPU_DISABLED', 'NNAPI_FLAG_CPU_ONLY'] \n"); + ORT_THROW( + "[ERROR] [NNAPI] wrong key type entered. Choose from the following runtime key options " + "that are available for NNAPI. " + "['NNAPI_FLAG_USE_FP16', 'NNAPI_FLAG_USE_NCHW', 'NNAPI_FLAG_CPU_DISABLED', 'NNAPI_FLAG_CPU_ONLY'] \n"); } } Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Nnapi(session_options, nnapi_flags)); @@ -476,10 +479,31 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); ORT_THROW("NNAPI is not supported in this build\n"); #endif } else if (provider_name_ == onnxruntime::kCoreMLExecutionProvider) { +#ifdef __APPLE__ #ifdef USE_COREML - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CoreML(session_options, 0)); + uint32_t coreml_flags = 0; + std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; + std::istringstream ss(ov_string); + + std::string key; + while (ss >> key) { + if (key == "COREML_FLAG_CREATE_MLPROGRAM") { + coreml_flags |= COREML_FLAG_CREATE_MLPROGRAM; + std::cout << "Enabling ML Program.\n"; + } else if (key.empty()) { + } else { + ORT_THROW( + "[ERROR] [CoreML] wrong key type entered. Choose from the following runtime key options " + "that are available for CoreML. ['COREML_FLAG_CREATE_MLPROGRAM'] \n"); + } + } + // COREML_FLAG_CREATE_MLPROGRAM + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CoreML(session_options, coreml_flags)); +#else + ORT_THROW("CoreML is not supported in this build\n"); +#endif #else - ORT_THROW("COREML is not supported in this build\n"); + ORT_THROW("COREML is not supported on this platform.\n"); #endif } else if (provider_name_ == onnxruntime::kDmlExecutionProvider) { #ifdef USE_DML diff --git a/onnxruntime/test/providers/coreml/coreml_basic_test.cc b/onnxruntime/test/providers/coreml/coreml_basic_test.cc index 7b6f1b9244be9..94817158017bd 100644 --- a/onnxruntime/test/providers/coreml/coreml_basic_test.cc +++ b/onnxruntime/test/providers/coreml/coreml_basic_test.cc @@ -192,5 +192,25 @@ TEST(CoreMLExecutionProviderTest, TestOrtFormatModel) { #endif } +// Test that we fix invalid names in model inputs, initializers and outputs. +// Names in CoreML cannot start with [0-9] or contain anything but "[a-z][A-Z][0-9]_" +TEST(CoreMLExecutionProviderTest, TestNameSanitization) { + OpTester test("Clip", 11); + + std::vector dims{3, 3}; + test.AddInput("0", dims, + {-1.0f, 0.0f, 1.0f, + -6.0f, 0.0f, 6.0f, + -5.4f, 2.0f, 6.0f}); + test.AddInput("1.min", {}, {-5}, true); // add as initializers + test.AddInput("2/max", {}, {5}, true); + test.AddOutput("3", dims, + {-1.0f, 0.0f, 1.0f, + -5.0f, 0.0f, 5.0f, + -5.0f, 2.0f, 5.0f}); + + // TensorRT does not support Clip opset 11 yet. + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/math/clip_test.cc b/onnxruntime/test/providers/cpu/math/clip_test.cc index efb46e86d04e4..b5d5f84df950a 100644 --- a/onnxruntime/test/providers/cpu/math/clip_test.cc +++ b/onnxruntime/test/providers/cpu/math/clip_test.cc @@ -182,7 +182,7 @@ TEST(MathOpTest, Clip) { run_test(true); } -// Use clip between [0, 6] as Relu6 (for some EPs, such as NNAPI) +// Use clip between [0, 6] as Relu6 to test optimized path in some EPs, such as NNAPI and CoreML TEST(MathOpTest, Clip_Relu6) { // To test NNAPI EP, we need the min/max to be in initializers auto run_test = [](bool min_max_are_initializer) { @@ -208,6 +208,31 @@ TEST(MathOpTest, Clip_Relu6) { run_test(true); } +// Use clip between [0, inf] as Relu to test optimized path in some EPs, such as CoreML +TEST(MathOpTest, Clip_Relu) { + // To test NNAPI EP, we need the min/max to be in initializers + auto run_test = [](bool min_max_are_initializer) { + OpTester test("Clip", 11); + + std::vector dims{3, 3}; + test.AddInput("X", dims, + {-1.0f, 0.0f, 1.0f, + -6.0f, 3.5f, 6.0f, + -5.4f, 2.0f, 8.0f}); + test.AddInput("min", {}, {0.0f}, min_max_are_initializer); + test.AddOutput("Y", dims, + {0.0f, 0.0f, 1.0f, + 0.0f, 3.5f, 6.0f, + 0.0f, 2.0f, 8.0f}); + + // TensorRT does not support Clip opset 11 yet. + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); + }; + + run_test(false); + run_test(true); +} + // Use clip between [-1, 1] as Relu1 (for some EPs, such as NNAPI) TEST(MathOpTest, Clip_Relu1) { // To test NNAPI EP, we need the min/max to be in initializers diff --git a/onnxruntime/test/providers/cpu/math/gemm_test.cc b/onnxruntime/test/providers/cpu/math/gemm_test.cc index bf089e083d67e..428925e154497 100644 --- a/onnxruntime/test/providers/cpu/math/gemm_test.cc +++ b/onnxruntime/test/providers/cpu/math/gemm_test.cc @@ -281,24 +281,31 @@ using GemmOpTypedTestsTypes = ::testing::Types; TYPED_TEST_SUITE(GemmOpTypedTests, GemmOpTypedTestsTypes); TYPED_TEST(GemmOpTypedTests, TestGemmScalarBroadcast) { - OpTester test("Gemm"); + auto run_test = [](bool b_is_initializer, bool c_is_initializer) { + OpTester test("Gemm"); - test.AddAttribute("transA", (int64_t)0); - test.AddAttribute("transB", (int64_t)0); - test.AddAttribute("alpha", 1.0f); - test.AddAttribute("beta", 1.0f); + test.AddAttribute("transA", (int64_t)0); + test.AddAttribute("transB", (int64_t)0); + test.AddAttribute("alpha", 1.0f); + test.AddAttribute("beta", 1.0f); - test.AddInput("A", {2, 4}, - {static_cast(1.0f), static_cast(2.0f), static_cast(3.0f), static_cast(4.0f), - static_cast(-1.0f), static_cast(-2.0f), static_cast(-3.0f), static_cast(-4.0f)}); - test.AddInput("B", {4, 3}, std::vector(12, static_cast(1.0f))); - test.AddInput("C", {1}, std::vector{static_cast(1.0f)}); - test.AddOutput("Y", {2, 3}, - {static_cast(11.0f), static_cast(11.0f), static_cast(11.0f), - static_cast(-9.0f), static_cast(-9.0f), static_cast(-9.0f)}); - test.Config(run_with_tunable_op) - .RunWithConfig(); + test.AddInput("A", {2, 4}, + {static_cast(1.0f), static_cast(2.0f), static_cast(3.0f), static_cast(4.0f), + static_cast(-1.0f), static_cast(-2.0f), static_cast(-3.0f), static_cast(-4.0f)}); + test.AddInput("B", {4, 3}, std::vector(12, static_cast(1.0f)), b_is_initializer); + test.AddInput("C", {1}, std::vector{static_cast(1.0f)}, c_is_initializer); + test.AddOutput("Y", {2, 3}, + {static_cast(11.0f), static_cast(11.0f), static_cast(11.0f), + static_cast(-9.0f), static_cast(-9.0f), static_cast(-9.0f)}); + test.Config(run_with_tunable_op) + .RunWithConfig(); + }; + + run_test(false, false); + // CoreML EP requires weight and bias to be initializers + run_test(true, true); } + TYPED_TEST(GemmOpTypedTests, TestGemm2DBroadcast_2) { OpTester test("Gemm"); diff --git a/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc b/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc index ee18cf2cea6cb..cbb4531a50b7c 100644 --- a/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc +++ b/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc @@ -75,6 +75,43 @@ TEST(BatchNormTest, PositiveTestCase) { input_data_map.insert({"mean", mean}); input_data_map.insert({"var", var}); + InputShapesMap input_shapes_map; + vector input_shape{1, 1, 7, 7}; + input_shapes_map.insert({"X", input_shape}); + input_shapes_map.insert({"scale", {1}}); + input_shapes_map.insert({"B", {1}}); + input_shapes_map.insert({"mean", {1}}); + input_shapes_map.insert({"var", {1}}); + + auto expected_output = {1.01359f, 0.703983f, 0.641631f, 1.08571f, 0.939167f, 0.762469f, 0.682729f, 0.762401f, 0.787021f, + 1.06744f, 0.604378f, 0.957476f, 0.667302f, 0.901764f, 1.07566f, 1.01117f, 0.928324f, 0.897667f, + 0.705842f, 0.660885f, 0.977291f, 0.878918f, 0.818345f, 1.06608f, 0.839057f, 1.04796f, 0.621471f, + 0.781831f, 0.760527f, 0.835665f, 1.05825f, 0.611442f, 0.781873f, 1.08437f, 0.907454f, 0.926173f, + 1.03375f, 0.707961f, 0.968646f, 0.621757f, 0.973095f, 0.700301f, 0.916723f, 0.807602f, 0.692598f, + 0.621972f, 0.707334f, 0.63723f, 0.63062f}; + float epsilon = 1e-05f; + TestBatchNorm(input_data_map, input_shapes_map, epsilon, expected_output, input_shape); +} + +TEST(BatchNormTest, PositiveTestCase_5D) { + // This input was taken from the SpatialBN_1.pb, SpatialBN_1_input.pb and SpatialBN_1_output.pb files. + vector X{0.329876f, -0.287158f, -0.411425f, 0.473621f, 0.18156f, -0.170596f, -0.329516f, -0.170733f, -0.121664f, 0.4372f, + -0.485668f, 0.218049f, -0.360263f, 0.107016f, 0.45358f, 0.325056f, 0.15995f, 0.098852f, -0.283453f, -0.373051f, + 0.257542f, 0.0614853f, -0.0592363f, 0.434488f, -0.0179583f, 0.398374f, -0.451602f, -0.132009f, -0.174468f, + -0.0247169f, 0.418897f, -0.47159f, -0.131925f, 0.470943f, 0.118357f, 0.155664f, 0.370062f, -0.279229f, 0.240311f, + -0.451034f, 0.249178f, -0.294496f, 0.13683f, -0.0806475f, -0.309849f, -0.450604f, -0.28048f, -0.420197f, -0.433369f}; + vector scale{0.589433f}; + vector B{-0.384622f}; + vector mean{-2.45673f}; + vector var{1.37998f}; + + InputDataMap input_data_map; + input_data_map.insert({"X", X}); + input_data_map.insert({"scale", scale}); + input_data_map.insert({"B", B}); + input_data_map.insert({"mean", mean}); + input_data_map.insert({"var", var}); + InputShapesMap input_shapes_map; vector input_shape{1, 1, 7, 7, 1}; input_shapes_map.insert({"X", input_shape}); diff --git a/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc b/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc index 10f02349a24d5..e32d171e62681 100644 --- a/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc @@ -566,8 +566,8 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear_pytorch_half_pixe test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kDmlExecutionProvider}); } -TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_4DBilinear_asymmetric) { - // To test NNAPI EP, we need the sclaes/sizes to be in initializers +TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_4DBilinear_asymmetric_scales) { + // To test CoreML/NNAPI EP, we need the scales/sizes to be in initializers auto run_test = [](bool scales_in_initializer) { OpTester test("Resize", 13); std::vector roi{};