CoreML: Aggregated changes to add all required ops for priority model (…

…#21472) ### Description  Add these changes to one PR to simplify checkin - Add Concat (#21423) - Add DepthToSpace (#21426) - Add LeakyRelu (#21453) - Add test scripts (#21427) - Add ability to set coreml flags from python (#21434) Other changes - updated partitioning utils to support dropping constant initializers from a ComputeCapability's inputs. - noticed that the list of inputs to the coreml model was unexpectedly long due to this - we copy constant initializers to a CoreML model so don't need the originals, and if they remain as inputs ORT can't free them as they appear to be in use. ### Motivation and Context
microsoft · Jul 25, 2024 · b0e1f7f · b0e1f7f
1 parent 3cdf4b9
commit b0e1f7f
Show file tree

Hide file tree

Showing 27 changed files with 783 additions and 137 deletions.
diff --git a/.lintrunner.toml b/.lintrunner.toml
@@ -137,6 +137,7 @@ exclude_patterns = [
     'onnxruntime/core/mickey/gemm/**', # CUTLASS based libs recommends NO automatic code formatting
     'winml/lib/Api.Image/shaders/**',  # Contains data chunks
     'onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h', # Bool Switches hang Clang
+    'onnxruntime/core/providers/coreml/mlprogram_test_scripts/**',  # test scripts only
 ]
 command = [
     'python',

diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h
@@ -1408,6 +1408,11 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
   RuntimeOptimizationRecordContainer& MutableRuntimeOptimizations() {
     return runtime_optimizations_;
   }
+
+  // We don't run Graph::Resolve() on an ORT format model, but a compiling EP may copy initializers to its
+  // compiled model during partitioning, leaving them unused in the ORT Graph. To allow the memory to be freed
+  // we need to manually run the cleanup that would usually happen as part of Graph::Resolve.
+  Status RemovedUnusedInitializersOrtFormat();
 #endif  // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
 
   // This friendship relationship should only be used to call Graph::Graph and
@@ -1541,12 +1546,6 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
 
   common::Status PerformTypeAndShapeInferencing(const ResolveOptions& options);
 
-  // Recursively find all subgraphs including nested subgraphs
-  void FindAllSubgraphs(std::vector<Graph*>& subgraphs);
-
-  // Iterate this Graph instance and all subgraphs, calling the provided function for each.
-  common::Status ForThisAndAllSubgraphs(const std::vector<Graph*>& subgraphs, std::function<Status(Graph&)> func);
-
   common::Status InferAndVerifyTypeMatch(Node& node, const ONNX_NAMESPACE::OpSchema& op, const ResolveOptions& options);
 
   // perform type and shape inferencing on the subgraph and Resolve to validate
@@ -1576,9 +1575,6 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
   // Implementation for initializer replacement
   Status ReplaceInitializedTensorImpl(ONNX_NAMESPACE::TensorProto new_initializer, bool is_external);
 
-  // Clear all unused initializers and NodeArgs
-  void CleanUnusedInitializersAndNodeArgs(const std::unordered_set<std::string>* initializer_names_to_preserve = nullptr);
-
   std::vector<NodeArg*> CreateNodeArgs(const google::protobuf::RepeatedPtrField<std::string>& names,
                                        const ArgNameToTypeMap& name_to_type_map);
 
@@ -1587,6 +1583,16 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
 #endif  // !defined(ORT_MINIMAL_BUILD)
 
 #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
+
+  // Recursively find all subgraphs including nested subgraphs
+  void FindAllSubgraphs(std::vector<Graph*>& subgraphs);
+
+  // Iterate this Graph instance and all subgraphs, calling the provided function for each.
+  common::Status ForThisAndAllSubgraphs(const std::vector<Graph*>& subgraphs, std::function<Status(Graph&)> func);
+
+  // Clear all unused initializers and NodeArgs
+  void CleanUnusedInitializersAndNodeArgs(const std::unordered_set<std::string>* initializer_names_to_preserve = nullptr);
+
   Status PopulateNodeArgToProducerConsumerLookupsFromNodes();
 
   template <typename TInstance>

diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
@@ -3254,27 +3254,6 @@ Status Graph::PerformTypeAndShapeInferencing(const ResolveOptions& options) {
   return Status::OK();
 }
 
-void Graph::FindAllSubgraphs(std::vector<Graph*>& subgraphs) {
-  for (auto& node : Nodes()) {
-    for (auto& subgraph : node.MutableSubgraphs()) {
-      subgraphs.push_back(subgraph.get());
-      subgraph->FindAllSubgraphs(subgraphs);
-    }
-  }
-}
-
-Status Graph::ForThisAndAllSubgraphs(const std::vector<Graph*>& subgraphs, std::function<Status(Graph&)> func) {
-  auto status = func(*this);
-  ORT_RETURN_IF_ERROR(status);
-
-  for (auto& subgraph : subgraphs) {
-    status = func(*subgraph);
-    ORT_RETURN_IF_ERROR(status);
-  }
-
-  return status;
-}
-
 Status Graph::Resolve(const ResolveOptions& options) {
   if (parent_graph_) {
     // Resolve must start at the top level graph in-order to handle outer scope
@@ -3387,6 +3366,39 @@ void Graph::AddInitializedTensor(const TensorProto& tensor) {
     ORT_IGNORE_RETURN_VALUE(GetOrCreateNodeArg(tensor.name(), &t));
   }
 }
+
+void Graph::FindAllSubgraphs(std::vector<Graph*>& subgraphs) {
+  for (auto& node : Nodes()) {
+    for (auto& subgraph : node.MutableSubgraphs()) {
+      subgraphs.push_back(subgraph.get());
+      subgraph->FindAllSubgraphs(subgraphs);
+    }
+  }
+}
+
+Status Graph::ForThisAndAllSubgraphs(const std::vector<Graph*>& subgraphs, std::function<Status(Graph&)> func) {
+  auto status = func(*this);
+  ORT_RETURN_IF_ERROR(status);
+
+  for (auto& subgraph : subgraphs) {
+    status = func(*subgraph);
+    ORT_RETURN_IF_ERROR(status);
+  }
+
+  return status;
+}
+
+Status Graph::RemovedUnusedInitializersOrtFormat() {
+  std::vector<Graph*> all_subgraphs;
+  FindAllSubgraphs(all_subgraphs);
+  auto cleanup_func = [](Graph& graph) {
+    graph.CleanUnusedInitializersAndNodeArgs(nullptr);
+    return Status::OK();
+  };
+
+  auto result = ForThisAndAllSubgraphs(all_subgraphs, cleanup_func);
+  return result;
+}
 #endif  // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
 
 const std::string& Graph::Name() const noexcept {
@@ -4122,6 +4134,9 @@ void Graph::ToGraphProtoInternal(ONNX_NAMESPACE::GraphProto& graph_proto) const
   }
 }
 
+#endif  // !defined(ORT_MINIMAL_BUILD)
+
+#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
 void Graph::CleanUnusedInitializersAndNodeArgs(const std::unordered_set<std::string>* initializer_names_to_preserve) {
   // Node Args being used
   std::unordered_set<const NodeArg*> used_args;
@@ -4253,8 +4268,7 @@ void Graph::CleanUnusedInitializersAndNodeArgs(const std::unordered_set<std::str
     }
   }
 }
-
-#endif  // !defined(ORT_MINIMAL_BUILD)
+#endif  // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
 
 void Graph::ComputeOverridableInitializers() {
   graph_overridable_initializers_.clear();

diff --git a/onnxruntime/core/providers/coreml/builders/impl/activation_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/activation_op_builder.cc
@@ -83,19 +83,30 @@ Status ActivationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
     using namespace CoreML::Specification::MILSpec;
     // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#module-coremltools.converters.mil.mil.ops.defs.iOS15.activation
     std::string_view coreml_op_type;
+    bool add_alpha = false;
     if (op_type == "Sigmoid") {
       coreml_op_type = "sigmoid";
     } else if (op_type == "Tanh") {
       coreml_op_type = "tanh";
     } else if (op_type == "Relu") {
       coreml_op_type = "relu";
+    } else if (op_type == "LeakyRelu") {
+      coreml_op_type = "leaky_relu";
+      add_alpha = true;
     } else {
       return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                              "ActivationOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type);
     }
 
     std::unique_ptr<Operation> op = model_builder.CreateOperation(node, coreml_op_type);
     AddOperationInput(*op, "x", node.InputDefs()[0]->Name());
+
+    if (add_alpha) {
+      NodeAttrHelper helper(node);
+      const auto alpha = helper.Get("alpha", 0.01f);
+      AddOperationInput(*op, "alpha", model_builder.AddScalarConstant(op->type(), "alpha", alpha));
+    }
+
     AddOperationOutput(*op, *node.OutputDefs()[0]);
 
     model_builder.AddOperation(std::move(op));
@@ -198,7 +209,7 @@ bool ActivationOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInp
 
 #if defined(COREML_ENABLE_MLPROGRAM)
   if (input_params.create_mlprogram) {
-    if (op_type == "PRelu" || op_type == "LeakyRelu") {
+    if (op_type == "PRelu") {  // TODO: ML Program supports this so should be easy to enable
       return false;
     }
   } else

diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
@@ -309,11 +309,33 @@ COREML_SPEC::MILSpec::NamedValueType CreateNamedTensorValueType(const NodeArg& n
 
 void AddOperationInput(MILSpec::Operation& op, std::string_view input_name, std::string_view value_name) {
   MILSpec::Argument arg;
-  arg.mutable_arguments()->Add()->set_name(std::string(value_name));
+  arg.mutable_arguments()->Add()->set_name(value_name.data(), value_name.size());
 
   (*op.mutable_inputs())[input_name] = std::move(arg);
 }
 
+void AddOperationVariadicInput(MILSpec::Operation& op, std::string_view input_name,
+                               const std::vector<std::string_view>& value_names) {
+  MILSpec::Argument arg;
+  for (const auto& value : value_names) {
+    arg.mutable_arguments()->Add()->set_name(value.data(), value.size());
+  }
+
+  (*op.mutable_inputs())[input_name] = std::move(arg);
+}
+
+void AddIntermediateOperationOutput(COREML_SPEC::MILSpec::Operation& op, std::string_view output_name,
+                                    int32_t element_type, std::optional<gsl::span<const int64_t>> shape) {
+  auto& outputs = *op.mutable_outputs();
+  auto& output_arg = *outputs.Add();
+  output_arg.set_name(output_name.data(), output_name.size());
+
+  MILSpec::ValueType& value = *output_arg.mutable_type();
+  MILSpec::TensorType& tensor_type = *value.mutable_tensortype();
+
+  SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(element_type), shape, /*convert_scalar*/ true);
+}
+
 void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output,
                         std::optional<int32_t> override_element_type) {
   auto& outputs = *op.mutable_outputs();

diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
@@ -129,6 +129,26 @@ COREML_SPEC::MILSpec::NamedValueType CreateNamedTensorValueType(const NodeArg& n
 void AddOperationInput(COREML_SPEC::MILSpec::Operation& op,
                        std::string_view input_name, std::string_view value_name);
 
+/// <summary>
+/// Add a variadic input argument to a MILSpec::Operation
+/// </summary>
+/// <param name="op">Operation to update.</param>
+/// <param name="input name">The input name defined by the spec for the operation. </param>
+/// <param name="value_names">The input value names.</param>
+void AddOperationVariadicInput(COREML_SPEC::MILSpec::Operation& op, std::string_view input_name,
+                               const std::vector<std::string_view>& value_names);
+
+/// Add an output to a MILSpec::Operation for an intermediate operation when the implementation is composed of
+/// multiple MLProgram operations. In this case we don't have a NodeArg for the output.
+/// </summary>
+/// <param name="op">Operation to update.</param>
+/// <param name="output_name">Name of the intermediate output. Create using ModelBuilder::GetUniqueName.</param>
+/// <param name="element_type">onnx::TensorProto_DataType element type of the output.
+///   int32_t as that is what TensorShapeProto uses to store the value.</param>
+/// <param name="shape">Shape of the output if known.</param>
+void AddIntermediateOperationOutput(COREML_SPEC::MILSpec::Operation& op, std::string_view output_name,
+                                    int32_t element_type, std::optional<gsl::span<const int64_t>> shape);
+
 /// <summary>
 /// Add an output to a MILSpec::Operation. Name, data type and shape are used from the NodeArg.
 /// </summary>

diff --git a/onnxruntime/core/providers/coreml/builders/impl/concat_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/concat_op_builder.cc
@@ -4,6 +4,7 @@
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/impl/builder_utils.h"
 #include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
@@ -18,27 +19,51 @@ class ConcatOpBuilder : public BaseOpBuilder {
 
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
+
+  bool SupportsMLProgram() const override { return true; }
 };
 
 Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                               const Node& node,
                                               const logging::Logger& logger) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
-
-  layer->mutable_concat()->set_sequenceconcat(false);
-
-  for (const auto* input : node.InputDefs()) {
-    LOGS(logger, VERBOSE) << "input name " << input->Name();
-    *layer->mutable_input()->Add() = input->Name();
+#if defined(COREML_ENABLE_MLPROGRAM)
+  if (model_builder.CreateMLProgram()) {
+    using namespace CoreML::Specification::MILSpec;  // NOLINT
+
+    NodeAttrHelper helper(node);
+    const auto axis = helper.GetInt64("axis");  // required
+    const auto interleave = false;
+
+    std::unique_ptr<Operation> op = model_builder.CreateOperation(node, "concat");
+    std::vector<std::string_view> input_names;
+    for (const auto* input : node.InputDefs()) {
+      input_names.emplace_back(input->Name());
+    }
+    AddOperationVariadicInput(*op, "values", input_names);
+    AddOperationInput(*op, "axis", model_builder.AddScalarConstant(op->type(), "axis", *axis));
+    AddOperationInput(*op, "interleave", model_builder.AddScalarConstant(op->type(), "interleave", interleave));
+    AddOperationOutput(*op, *node.OutputDefs()[0]);
+    model_builder.AddOperation(std::move(op));
+  } else  // NOLINT
+#endif    // defined(COREML_ENABLE_MLPROGRAM)
+  {
+    std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
+
+    layer->mutable_concat()->set_sequenceconcat(false);
+
+    for (const auto* input : node.InputDefs()) {
+      LOGS(logger, VERBOSE) << "input name " << input->Name();
+      *layer->mutable_input()->Add() = input->Name();
+    }
+
+    *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
+
+    model_builder.AddLayer(std::move(layer));
   }
-
-  *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
-
-  model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
 
-bool ConcatOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /* input_params */,
+bool ConcatOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                         const logging::Logger& logger) const {
   const auto& input_defs = node.InputDefs();
   if (input_defs.size() < 2) {
@@ -50,23 +75,25 @@ bool ConcatOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPa
   if (!GetShape(*input_defs[0], input_shape, logger))
     return false;
 
-  auto rank = input_shape.size();
-  if (rank != 4) {
-    // For some reason, the concat in CoreML running on 3d tensor will concat on wrong axis
-    // Instead of concat on axis 0, it will concat on axis 1
-    // Disable Concat support for 3d tensor for now
-    // TODO, add ExpandDims and Squeeze, 3d -ExpandDims-> 4d -> Concat -Squeeze-> 3d
-    LOGS(logger, VERBOSE) << "Concat only support 4d shape for now, input is "
-                          << rank << "d shape";
-    return false;
-  }
-
-  NodeAttrHelper helper(node);
-  auto axis = static_cast<size_t>(HandleNegativeAxis(helper.Get("axis", 1), rank));
-  if (rank != axis + 3) {
-    LOGS(logger, VERBOSE) << "Concat only support axis to be -3, actual axis: " << axis
-                          << ", actual rank: " << rank;
-    return false;
+  if (!input_params.create_mlprogram) {
+    auto rank = input_shape.size();
+    if (rank != 4) {
+      // For some reason, the concat in CoreML running on 3d tensor will concat on wrong axis
+      // Instead of concat on axis 0, it will concat on axis 1
+      // Disable Concat support for 3d tensor for now
+      // TODO: add ExpandDims and Squeeze, 3d -ExpandDims-> 4d -> Concat -Squeeze-> 3d
+      LOGS(logger, VERBOSE) << "Concat only support 4d shape for now, input is "
+                            << rank << "d shape";
+      return false;
+    }
+
+    NodeAttrHelper helper(node);
+    auto axis = static_cast<size_t>(HandleNegativeAxis(helper.Get("axis", 1), rank));
+    if (rank != axis + 3) {
+      LOGS(logger, VERBOSE) << "Concat only support axis to be -3, actual axis: " << axis
+                            << ", actual rank: " << rank;
+      return false;
+    }
   }
 
   return true;