Add ML Program support for more operators (#19527)

### Description  Add support for: - Clip/Relu/Relu6 - Add/Mul/Div/Sub/Pow - GlobalAveragePool/GlobalMaxPool/AveragePool/MaxPool - Reshape - Gemm/MatMul Fix some build issues/warnings from changes. Fix a couple of potential issues with the Resize op as well (noticed due to change to reject inputs with empty data at a higher level). ### Motivation and Context  Enable mobilenetv2 with ML Program
microsoft · Mar 1, 2024 · 2a857d9 · 2a857d9
1 parent 5ee62a6
commit 2a857d9
Show file tree

Hide file tree

Showing 33 changed files with 1,344 additions and 595 deletions.
diff --git a/cmake/onnxruntime_providers_coreml.cmake b/cmake/onnxruntime_providers_coreml.cmake
@@ -111,7 +111,7 @@ if(_enable_ML_PROGRAM)
   file(GLOB
     onnxruntime_providers_coreml_modelpackage_cc_srcs CONFIGURE_DEPENDS
     "${coremltools_SOURCE_DIR}/modelpackage/src/ModelPackage.?pp"
-    "${coremltools_SOURCE_DIR}/modelpackage/src/Utils/JsonMap.?pp"
+    "${coremltools_SOURCE_DIR}/modelpackage/src/utils/JsonMap.?pp"
   )
 
   set(coremltools_srcs

diff --git a/onnxruntime/core/providers/coreml/builders/coreml_spec.h b/onnxruntime/core/providers/coreml/builders/coreml_spec.h
@@ -17,14 +17,19 @@
 #ifdef HAS_SHORTEN_64_TO_32
 #pragma GCC diagnostic ignored "-Wshorten-64-to-32"
 #endif
+#elif defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4244)  // conversion from long to int
 #endif
 
 // Model.pb.h is generated in the build output directory from the CoreML protobuf files in
-// onnxruntime/core/providers/coreml/coremltools/mlmodel/format
+// <build output directory>/_deps/coremltools-src/mlmodel/format
 #include "coreml_proto/Model.pb.h"
 
 #if defined(__GNUC__)
 #pragma GCC diagnostic pop
+#elif defined(_MSC_VER)
+#pragma warning(pop)
 #endif
 
 namespace COREML_SPEC = CoreML::Specification;
diff --git a/onnxruntime/core/providers/coreml/builders/helper.cc b/onnxruntime/core/providers/coreml/builders/helper.cc
@@ -85,9 +85,15 @@ bool IsInputSupported(const Node& node, const NodeArg& input,
     }
 
     if (dim == 0) {
-      LOGS(logger, WARNING) << "CoreML does not support shapes with dimension values of 0. Input:" << input_name
-                            << ", shape: " << Shape2String(shape);
-      return false;
+      if (node.OpType() == "Resize" && &input == node.InputDefs()[1]) {
+        // one special case. Resize 'roi' input was originally a required input but is rarely used.
+        // ROI is not supported in the CoreML implementation so we will ignore the value, but is often added
+        // (at least in the unit tests) as an initializer with shape {0}.
+      } else {
+        LOGS(logger, WARNING) << "CoreML does not support shapes with dimension values of 0. Input:" << input_name
+                              << ", shape: " << Shape2String(shape);
+        return false;
+      }
     }
   }
 
@@ -125,7 +131,7 @@ std::unordered_set<const Node*> GetSupportedNodes(const GraphViewer& graph_viewe
 
 bool CheckIsConstantInitializer(const NodeArg& node_arg, const GraphViewer& graph_viewer,
                                 const logging::Logger& logger, std::string_view input_description) {
-  if (graph_viewer.GetConstantInitializer(node_arg.Name(), true) == nullptr) {
+  if (graph_viewer.GetConstantInitializer(node_arg.Name()) == nullptr) {
     LOGS(logger, VERBOSE) << input_description << " (NodeArg name: '" << node_arg.Name()
                           << "') is not a constant initializer tensor";
     return false;

diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc
@@ -83,9 +83,14 @@ bool BaseOpBuilder::HasSupportedInputs(const Node& node, const OpBuilderInputPar
 }
 
 /* static */
-bool BaseOpBuilder::IsInput0Supported(const Node& node, const OpBuilderInputParams& /*input_params*/,
-                                      const logging::Logger& logger) {
-  const auto& input = *node.InputDefs()[0];
+bool BaseOpBuilder::IsInputFloat(const Node& node, size_t idx, const OpBuilderInputParams& /*input_params*/,
+                                 const logging::Logger& logger) {
+  if (idx >= node.InputDefs().size()) {
+    LOGS(logger, VERBOSE) << "Input index [" << idx << "] is out of range";
+    return false;
+  }
+
+  const auto& input = *node.InputDefs()[idx];
 
   int32_t input_type = ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED;
 
@@ -102,7 +107,7 @@ bool BaseOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInpu
                                            const logging::Logger& logger) const {
   // We only check the type of input 0 by default
   // specific op builder can override this
-  return IsInput0Supported(node, input_params, logger);
+  return IsInputFloat(node, 0, input_params, logger);
 }
 
 bool BaseOpBuilder::HasSupportedOpSet(const Node& node, const logging::Logger& logger) const {

diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h
@@ -28,9 +28,9 @@ class BaseOpBuilder : public IOpBuilder {
   void AddInitializersToSkip(ModelBuilder& /*model_builder*/, const Node& /*node*/) const override {}
 
  protected:
-  // check if the first input's data type is supported.
-  static bool IsInput0Supported(const Node& node, const OpBuilderInputParams& input_params,
-                                const logging::Logger& logger);
+  // currently we only support float
+  static bool IsInputFloat(const Node& node, size_t idx, const OpBuilderInputParams& input_params,
+                           const logging::Logger& logger);
 
  private:
   virtual bool IsOpSupportedImpl(const Node& /*node*/, const OpBuilderInputParams& /*input_params*/,

diff --git a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc
@@ -5,6 +5,7 @@
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/impl/builder_utils.h"
 #include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/shared/utils/utils.h"
@@ -19,6 +20,8 @@ class BinaryOpBuilder : public BaseOpBuilder {
 
   bool HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params,
                               const logging::Logger& logger) const override;
+
+  bool SupportsMLProgram() const override { return true; }
 };
 
 namespace {
@@ -57,38 +60,72 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
   const auto& op_type(node.OpType());
   const auto& input_defs(node.InputDefs());
 
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
-
-  if (op_type == "Add") {
-    // original mutable_add() has limited broadcasting support
-    // updated to use CoreML::AddBroadcastableLayerParams which has more general broadcasting support
-    if (CheckIfBothInputShapesMatch(node, logger)) {
-      layer->mutable_add();
+#if defined(COREML_ENABLE_MLPROGRAM)
+  if (model_builder.CreateMLProgram()) {
+    using namespace CoreML::Specification::MILSpec;
+
+    // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#module-coremltools.converters.mil.mil.ops.defs.iOS15.elementwise_binary
+    std::string_view coreml_op_type;
+    if (op_type == "Add") {
+      coreml_op_type = "add";
+    } else if (op_type == "Mul") {
+      coreml_op_type = "mul";
+    } else if (op_type == "Sub") {
+      coreml_op_type = "sub";
+    } else if (op_type == "Div") {
+      // we only support fp32 currently. when we add support for integers we need to check the type and use
+      // "floor_div" or "real_div" accordingly
+      coreml_op_type = "real_div";
+    } else if (op_type == "Pow") {
+      coreml_op_type = "pow";
     } else {
-      layer->mutable_addbroadcastable();
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "BinaryOpBuilder::AddToModelBuilderImpl, unexpected op: ", op_type);
     }
-  } else if (op_type == "Mul") {
-    if (CheckIfBothInputShapesMatch(node, logger)) {
-      layer->mutable_multiply();
+
+    std::unique_ptr<Operation> op = model_builder.CreateOperation(node, coreml_op_type);
+    AddOperationInput(*op, "x", input_defs[0]->Name());
+    AddOperationInput(*op, "y", input_defs[1]->Name());
+    AddOperationOutput(*op, *node.OutputDefs()[0]);
+
+    model_builder.AddOperation(std::move(op));
+  } else
+#endif  // defined (COREML_ENABLE_MLPROGRAM)
+  {
+    std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
+
+    if (op_type == "Add") {
+      // original mutable_add() has limited broadcasting support
+      // updated to use CoreML::AddBroadcastableLayerParams which has more general broadcasting support
+      if (CheckIfBothInputShapesMatch(node, logger)) {
+        layer->mutable_add();
+      } else {
+        layer->mutable_addbroadcastable();
+      }
+    } else if (op_type == "Mul") {
+      if (CheckIfBothInputShapesMatch(node, logger)) {
+        layer->mutable_multiply();
+      } else {
+        layer->mutable_multiplybroadcastable();
+      }
+    } else if (op_type == "Sub") {
+      layer->mutable_subtractbroadcastable();
+    } else if (op_type == "Div") {
+      layer->mutable_dividebroadcastable();
+    } else if (op_type == "Pow") {
+      layer->mutable_powbroadcastable();
     } else {
-      layer->mutable_multiplybroadcastable();
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "BinaryOpBuilder::AddToModelBuilderImpl, unexpected op: ", op_type);
     }
-  } else if (op_type == "Sub") {
-    layer->mutable_subtractbroadcastable();
-  } else if (op_type == "Div") {
-    layer->mutable_dividebroadcastable();
-  } else if (op_type == "Pow") {
-    layer->mutable_powbroadcastable();
-  } else {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                           "BinaryOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type);
-  }
 
-  *layer->mutable_input()->Add() = input_defs[0]->Name();
-  *layer->mutable_input()->Add() = input_defs[1]->Name();
-  *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
+    *layer->mutable_input()->Add() = input_defs[0]->Name();
+    *layer->mutable_input()->Add() = input_defs[1]->Name();
+    *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
+
+    model_builder.AddLayer(std::move(layer));
+  }
 
-  model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
 
@@ -99,25 +136,11 @@ int BinaryOpBuilder::GetMinSupportedOpSet(const Node& /* node */) const {
 
 bool BinaryOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params,
                                              const logging::Logger& logger) const {
-  if (node.OpType() != "Pow") {
-    return IsInput0Supported(node, input_params, logger);
-  }
-
-  const auto& input_1 = *node.InputDefs()[0];
-  const auto& input_2 = *node.InputDefs()[1];
-
-  // Pow we only support both inputs as fp32 for now
-  int32_t input_type_1;
-  int32_t input_type_2;
-  if (!GetType(input_1, input_type_1, logger) ||
-      !GetType(input_2, input_type_2, logger)) {
-    return false;
-  }
-
-  if (input_type_1 != ONNX_NAMESPACE::TensorProto_DataType_FLOAT || input_type_1 != input_type_2) {
-    LOGS(logger, VERBOSE) << "Pow only supports fp32 inputs, actual input type"
-                          << ", Input type 1: " << input_type_1
-                          << ", Input type 2: " << input_type_2;
+  // Add/Sub/Mul/Div spec says inputs must be of the same type.
+  // Pow spec says inputs can be different types.
+  // We only support float for all of these inputs.
+  if (!IsInputFloat(node, 0, input_params, logger) ||
+      ((node.OpType() == "Pow") && !IsInputFloat(node, 1, input_params, logger))) {
     return false;
   }
 

diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
@@ -7,6 +7,7 @@
 #include "core/framework/tensorprotoutils.h"
 #include "core/providers/coreml/builders/coreml_spec.h"
 #include "core/providers/coreml/builders/helper.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/shared/utils/utils.h"
 #include "core/optimizer/initializer.h"
 
@@ -132,6 +133,7 @@ void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<c
   CreateCoreMLWeightConvertingDataToFloats(weight, data);
 }
 
+#if defined(COREML_ENABLE_MLPROGRAM)
 //
 // ML Program Utils
 //
@@ -309,5 +311,71 @@ void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& outp
                     output.Shape());
 }
 
+void AddPadTypeAndPads(COREML_SPEC::MILSpec::Operation& op, ModelBuilder& model_builder, std::string_view op_type,
+                       const NodeAttrHelper& helper, int num_spatial_dims) {
+  AutoPadType auto_pad_type = StringToAutoPadType(helper.Get("auto_pad", "NOTSET"));
+
+  // pad type (string)
+  //   valid - no pads  (ONNX auto_pad VALID)
+  //   custom - pads input  (ONNX NOTSET)
+  //   same - inferred to be `d_out[i] = ceil(d_in[i] / strides[i])`  (assuming == ONNX SAME_UPPER)
+  //   same_lower - as per same but any extra rows/cols are added at top/left if padding is odd (ONNX SAME_LOWER)
+  //
+  // TODO: See if we want to update HandleAutoPad to support 1D (and 3D) so we can infer if an autopad value
+  //       can be used. TBD if that provides any performance benefit with ML Program though as CoreML could
+  //       potentially do that same optimization internally.
+  switch (auto_pad_type) {
+    case AutoPadType::NOTSET: {
+      // use `pads` attribute.
+      auto onnx_pads = helper.GetInt64s("pads");  // 'pads' are used if auto_pad is NOTSET
+      if (onnx_pads) {
+        AddOperationInput(op, "pad_type",
+                          model_builder.AddScalarConstant(op_type, "pad_type", std::string("custom")));
+
+        // need to re-order from x1_start, x2_start..., x1_end, x2_end... to
+        // x1_start, x1_end, x2_start, x2_end,...
+        size_t num_pads = onnx_pads->size();
+        size_t num_dims = num_pads / 2;
+        std::vector<int64_t> reordered_pads(num_pads, 0);
+        for (size_t i = 0; i < num_pads; ++i) {
+          auto cur_dim = i % num_dims;
+          if (i < num_dims) {  // start values
+            reordered_pads[cur_dim * 2] = (*onnx_pads)[i];
+          } else {  // end values
+            reordered_pads[cur_dim * 2 + 1] = (*onnx_pads)[i];
+          }
+        }
+
+        AddOperationInput(op, "pad", model_builder.AddConstant(op_type, "pad", reordered_pads));
+
+        break;
+      }
+
+      // fall through if explicit pads were not provided as the default value for `pads` is all zeros,
+      // which is the same as 'valid' padding.
+      [[fallthrough]];
+    }
+    case AutoPadType::VALID:
+      AddOperationInput(op, "pad_type",
+                        model_builder.AddScalarConstant(op_type, "pad_type", std::string("valid")));
+
+      break;
+    case AutoPadType::SAME_UPPER:
+    case AutoPadType::SAME_LOWER: {
+      const auto pad_type = (auto_pad_type == AutoPadType::SAME_UPPER ? "same" : "same_lower");
+      AddOperationInput(op, "pad_type",
+                        model_builder.AddScalarConstant(op_type, "pad_type", std::string(pad_type)));
+
+      // despite what the spec says, a 'pad' input seems to be required.
+      // https://github.com/apple/coremltools/issues/2127
+      // Provide the default value as that's what coremltools does for conv/avg_pool/max_pool.
+      std::vector<int64_t> ignored_pads(num_spatial_dims * 2, 0);
+      AddOperationInput(op, "pad", model_builder.AddConstant(op_type, "pad", ignored_pads));
+
+      break;
+    }
+  }
+}
+#endif  // defined(COREML_ENABLE_MLPROGRAM)
 }  // namespace coreml
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
@@ -11,13 +11,15 @@
 #include "core/common/status.h"
 #include "core/graph/basic_types.h"
 #include "core/providers/common.h"
-
 #include "core/providers/coreml/builders/coreml_spec.h"
+#include "core/providers/shared/utils/utils.h"
 
 namespace onnxruntime {
 class NodeArg;
 
 namespace coreml {
+class ModelBuilder;
+
 // Try to see if we can map explicit padding to auto padding for Conv/Pool
 // Since usually use auto padding is more efficient
 Status HandleAutoPad(const std::vector<int64_t> input_shape,
@@ -45,6 +47,7 @@ void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<c
 // Copy the int64_t array to a coreml weight
 void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const int64_t> data);
 
+#if defined(COREML_ENABLE_MLPROGRAM)
 //
 // MLProgram utils
 //
@@ -130,5 +133,17 @@ void AddOperationInput(COREML_SPEC::MILSpec::Operation& op,
 /// <param name="op">Operation to update.</param>
 /// <param name="output">NodeArg with details of output to add.</param>
 void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output);
+
+/// <summary>
+/// Add pad_type and pad values.
+/// </summary>
+/// <param name="op">Operator to update</param>
+/// <param name="model_builder">ModelBuilder to add constants with.</param>
+/// <param name="op_type">Operator type.</param>
+/// <param name="helper">Node attribute helper.</param>
+/// <param name="num_spatial_dims">Number of spatial dims in input. Generally rank - 2 (ignore N and C dims).</param>
+void AddPadTypeAndPads(COREML_SPEC::MILSpec::Operation& op, ModelBuilder& model_builder, std::string_view op_type,
+                       const NodeAttrHelper& helper, int num_spatial_dims);
+#endif  // defined(COREML_ENABLE_MLPROGRAM)
 }  // namespace coreml
 }  // namespace onnxruntime