From 0f1f3b7705ddc2fe4f371f78a8a8b6a0428a68de Mon Sep 17 00:00:00 2001
From: Scott McKay <skottmckay@gmail.com>
Date: Tue, 23 Jul 2024 20:21:55 +1000
Subject: [PATCH] CoreML: ML Program Slice (#21433)

### Description
<!-- Describe your changes. -->
Add support for Slice


### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
High priority models.
---
 .../coreml/builders/impl/builder_utils.cc     |  10 +-
 .../coreml/builders/impl/builder_utils.h      |   7 +-
 .../coreml/builders/impl/slice_op_builder.cc  | 130 +++++++++++++-----
 .../coreml/builders/model_builder.cc          |   7 +
 .../providers/coreml/builders/model_builder.h |   7 +
 .../providers/cpu/tensor/slice_op.test.cc     |   2 +-
 .../apple/coreml_supported_mlprogram_ops.md   |   1 +
 7 files changed, 127 insertions(+), 37 deletions(-)
diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
index 2fcf9a1d7d9ba..ebb3f97895f06 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
@@ -164,6 +164,7 @@ void SetTensorTypeInfo(MILSpec::TensorType& tensor_type, MILSpec::DataType data_
 void SetTensorTypeInfo(MILSpec::TensorType& tensor_type, MILSpec::DataType data_type,
                        const ONNX_NAMESPACE::TensorShapeProto* shape, bool convert_scalar = false) {
   tensor_type.set_datatype(data_type);
+
   if (shape) {
     auto rank = shape->dim_size();
     if (convert_scalar && rank == 0) {
@@ -313,7 +314,8 @@ void AddOperationInput(MILSpec::Operation& op, std::string_view input_name, std:
   (*op.mutable_inputs())[input_name] = std::move(arg);
 }
 
-void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output) {
+void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output,
+                        std::optional<int32_t> override_element_type) {
   auto& outputs = *op.mutable_outputs();
   auto& output_arg = *outputs.Add();
   output_arg.set_name(output.Name());
@@ -321,8 +323,10 @@ void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& outp
   MILSpec::ValueType& value = *output_arg.mutable_type();
   MILSpec::TensorType& tensor_type = *value.mutable_tensortype();
 
-  SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(output.TypeAsProto()->tensor_type().elem_type()),
-                    output.Shape(), /*convert_scalar*/ true);
+  auto elem_type = override_element_type ? *override_element_type
+                                         : output.TypeAsProto()->tensor_type().elem_type();
+
+  SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(elem_type), output.Shape(), /*convert_scalar*/ true);
 }
 
 void AddPadTypeAndPads(COREML_SPEC::MILSpec::Operation& op, ModelBuilder& model_builder, std::string_view op_type,
diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
index 97fb83b6dc482..f012e6af0d718 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
+++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
@@ -134,7 +134,12 @@ void AddOperationInput(COREML_SPEC::MILSpec::Operation& op,
 /// </summary>
 /// <param name="op">Operation to update.</param>
 /// <param name="output">NodeArg with details of output to add.</param>
-void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output);
+/// <param name="override_element_type">
+///   Override the element type. Only set to handle cases where we believe the data at runtime will be int32 but
+///   the original ONNX node has type int64.
+/// </param>
+void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output,
+                        std::optional<int32_t> override_element_type = std::nullopt);
 
 /// <summary>
 /// Add pad_type and pad values.
diff --git a/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc
index 39bfbfe5bba1f..51fc3f2c11c73 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc
@@ -4,6 +4,7 @@
 #include "core/optimizer/initializer.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/impl/builder_utils.h"
 #include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
@@ -28,12 +29,14 @@ class SliceOpBuilder : public BaseOpBuilder {
 
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& builder_params,
                          const logging::Logger& logger) const override;
+
+  bool SupportsMLProgram() const override { return true; }
 };
 
 namespace {
-Status PrepareSliceComputeMetadataFromConstantInitializers(const Node& slice_node,
-                                                           const GraphViewer& graph_viewer,
-                                                           SliceOp::PrepareForComputeMetadata& compute_metadata) {
+Status PrepareSliceComputeMetadata(const Node& slice_node,
+                                   const GraphViewer& graph_viewer,
+                                   SliceOp::PrepareForComputeMetadata& compute_metadata) {
   // TODO largely copied from nnapi::SliceOpBuilder::AddToModelBuilderImpl. put it somewhere where it can be reused?
 
   const auto input_defs = slice_node.InputDefs();
@@ -114,55 +117,113 @@ void SliceOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const No
 
 Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                              const logging::Logger& logger) const {
+  const auto& input_defs = node.InputDefs();
+  const auto& output_defs = node.OutputDefs();
+
   std::vector<int64_t> data_shape;
   ORT_RETURN_IF_NOT(GetStaticShape(*node.InputDefs()[0], data_shape, logger), "Failed to get input shape.");
+  auto rank = data_shape.size();
 
   SliceOp::PrepareForComputeMetadata compute_metadata{data_shape};
-  ORT_RETURN_IF_ERROR(PrepareSliceComputeMetadataFromConstantInitializers(node, model_builder.GetGraphViewer(),
-                                                                          compute_metadata));
+  ORT_RETURN_IF_ERROR(PrepareSliceComputeMetadata(node, model_builder.GetGraphViewer(), compute_metadata));
+
+#if defined(COREML_ENABLE_MLPROGRAM)
+  if (model_builder.CreateMLProgram()) {
+    using namespace CoreML::Specification::MILSpec;  // NOLINT
+    // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.tensor_transformation.slice_by_index
+
+    const InlinedVector<bool> begin_mask_values(rank, false);
+    InlinedVector<bool> end_mask_values(rank, false);
+
+    // Special case - stepping backwards up to and including the first index in the dimension.
+    // In ONNX Slice, we use end <= -(rank + 1) to represent this. In CoreML, setting endids like that doesn't work,
+    // so use endmasks to specify the rest of the dimension instead.
+    for (size_t i = 0; i < rank; ++i) {
+      if (compute_metadata.steps_[i] < 0 && compute_metadata.ends_[i] == -1) {
+        end_mask_values[i] = true;
+      }
+    }
 
-  auto layer = model_builder.CreateNNLayer(node);
-  *layer->mutable_input()->Add() = node.InputDefs()[0]->Name();
-  *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
-  auto* slice_static = layer->mutable_slicestatic();
+    // Only int32 and float are supported by CoreML slice_by_index.
+    // We convert any int64 model input to int32 when running the CoreML model for the partition.
+    // Any other integer data created at runtime is the output from CoreML operations, and should int32 not int64.
+    // Based on that, we assume that the actual input when running will be int32, so we override the output data
+    // type to reflect this.
+    // If we were to leave it as TensorProto_DataType_INT64 the CoreML model would be invalid.
+    std::optional<int32_t> output_datatype;
 
-  for (size_t i = 0; i < compute_metadata.starts_.size(); ++i) {
-    const auto step = compute_metadata.steps_[i],
-               start = compute_metadata.starts_[i],
-               end = compute_metadata.ends_[i];
+    int32_t input_type;
+    ORT_RETURN_IF_NOT(GetType(*node.InputDefs()[0], input_type, logger), "Failed to get input type");
 
-    slice_static->add_beginids(start);
-    slice_static->add_beginmasks(false);
+    if (input_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
+      output_datatype = ONNX_NAMESPACE::TensorProto_DataType_INT32;
+    }
 
-    if (step < 0 && end == -1) {
-      // Special case - stepping backwards up to and including the first index in the dimension.
-      // In ONNX Slice, we use end <= -(rank + 1) to represent this. In CoreML, setting endids like that doesn't work,
-      // so use endmasks to specify the rest of the dimension instead.
-      slice_static->add_endids(-1);  // ignored
-      slice_static->add_endmasks(true);
-    } else {
-      slice_static->add_endids(end);
-      slice_static->add_endmasks(false);
+    auto op = model_builder.CreateOperation(node, "slice_by_index");
+
+    auto begin = model_builder.AddConstant(op->type(), "begin", AsSpan(compute_metadata.starts_));
+    auto end = model_builder.AddConstant(op->type(), "end", AsSpan(compute_metadata.ends_));
+    auto stride = model_builder.AddConstant(op->type(), "stride", AsSpan(compute_metadata.steps_));
+    auto begin_mask = model_builder.AddConstant(op->type(), "begin_mask", AsSpan(begin_mask_values));
+    auto end_mask = model_builder.AddConstant(op->type(), "end_mask", AsSpan(end_mask_values));
+
+    AddOperationInput(*op, "x", input_defs[0]->Name());
+    AddOperationInput(*op, "begin", begin);
+    AddOperationInput(*op, "end", end);
+    AddOperationInput(*op, "stride", stride);
+    AddOperationInput(*op, "begin_mask", begin_mask);
+    AddOperationInput(*op, "end_mask", end_mask);
+
+    AddOperationOutput(*op, *output_defs[0], output_datatype);
+
+    model_builder.AddOperation(std::move(op));
+
+  } else  // NOLINT
+#endif    // defined(COREML_ENABLE_MLPROGRAM)
+  {
+    auto layer = model_builder.CreateNNLayer(node);
+    *layer->mutable_input()->Add() = input_defs[0]->Name();
+    *layer->mutable_output()->Add() = output_defs[0]->Name();
+    auto* slice_static = layer->mutable_slicestatic();
+
+    for (size_t i = 0; i < rank; ++i) {
+      const auto step = compute_metadata.steps_[i],
+                 start = compute_metadata.starts_[i],
+                 end = compute_metadata.ends_[i];
+
+      slice_static->add_beginids(start);
+      slice_static->add_beginmasks(false);
+
+      if (step < 0 && end == -1) {
+        // Special case - stepping backwards up to and including the first index in the dimension.
+        // In ONNX Slice, we use end <= -(rank + 1) to represent this. In CoreML, setting endids like that doesn't work,
+        // so use endmasks to specify the rest of the dimension instead.
+        slice_static->add_endids(-1);  // ignored
+        slice_static->add_endmasks(true);
+      } else {
+        slice_static->add_endids(end);
+        slice_static->add_endmasks(false);
+      }
+
+      slice_static->add_strides(step);
     }
 
-    slice_static->add_strides(step);
+    model_builder.AddLayer(std::move(layer));
   }
 
-  model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
 
 bool SliceOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& /*input_params*/,
                                             const logging::Logger& logger) const {
   int32_t input_type;
-  if (!GetType(*node.InputDefs()[0], input_type, logger))
+  if (!GetType(*node.InputDefs()[0], input_type, logger)) {
     return false;
+  }
 
   if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT &&
       input_type != ONNX_NAMESPACE::TensorProto_DataType_INT64) {
-    LOGS(logger, VERBOSE) << "[" << node.OpType()
-                          << "] Input type: [" << input_type
-                          << "] is not supported for now";
+    LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not supported";
     return false;
   }
 
@@ -197,9 +258,14 @@ bool SliceOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPar
   }
 
   SliceOp::PrepareForComputeMetadata compute_metadata{data_shape};
-  ORT_THROW_IF_ERROR(PrepareSliceComputeMetadataFromConstantInitializers(node, builder_params.graph_viewer,
-                                                                         compute_metadata));
+  auto status = PrepareSliceComputeMetadata(node, builder_params.graph_viewer, compute_metadata);
+  if (status != Status::OK()) {
+    LOGS(logger, VERBOSE) << "PrepareSliceComputeMetadata failed:" << status.ErrorMessage();
+    return false;
+  }
+
   if (!ValidateSliceComputeMetadataForCoreML(compute_metadata, logger)) {
+    // error logged in ValidateSliceComputeMetadataForCoreML
     return false;
   }
 
diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.cc b/onnxruntime/core/providers/coreml/builders/model_builder.cc
index eec0fcce51dbc..9668bfcd09adf 100644
--- a/onnxruntime/core/providers/coreml/builders/model_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/model_builder.cc
@@ -839,6 +839,13 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i
     if (is_input) {
       // the model inputs need to be wired up as args to the 'main' function.
       auto tensor_value_type = CreateNamedTensorValueType(node_arg, /*convert_scalar*/ true);
+
+      // we need to convert int64 to int32 here as well
+      if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
+        tensor_value_type.mutable_type()->mutable_tensortype()->set_datatype(
+            OnnxDataTypeToMILSpec(ONNX_NAMESPACE::TensorProto_DataType_INT32));
+      }
+
       tensor_value_type.set_name(name);
 
       mlprogram_main_fn_->mutable_inputs()->Add(std::move(tensor_value_type));
diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.h b/onnxruntime/core/providers/coreml/builders/model_builder.h
index 385588dbfdcb8..bb791fb902908 100644
--- a/onnxruntime/core/providers/coreml/builders/model_builder.h
+++ b/onnxruntime/core/providers/coreml/builders/model_builder.h
@@ -121,6 +121,13 @@ class ModelBuilder {
     return AddConstant(op_type, value_type, AsSpan(value), shape);
   }
 
+  // helper to convert a span of non-const data to const
+  template <typename T>
+  std::string_view AddConstant(std::string_view op_type, std::string_view value_type, gsl::span<T> value,
+                               std::optional<gsl::span<const int64_t>> shape = std::nullopt) {
+    return AddConstant(op_type, value_type, gsl::span<const T>(value), shape);
+  }
+
   /// <summary>
   /// Add a scalar value as a 'const' operation. See AddConstant for details.
   /// </summary>
diff --git a/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc b/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc
index af54ae96ef86b..83b308b57f26b 100644
--- a/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc
@@ -90,7 +90,7 @@ void RunSliceTest(const std::vector<int64_t>& input_dims,
 
   run_test(false);
 
-  // NNAPI EP requires the starts/ends/axes/steps be initializers
+  // EPs like NNAPI and CoreML require the starts/ends/axes/steps be initializers
   run_test(true);
 }
 
diff --git a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md
index 3b3790ba06599..c33184686c932 100644
--- a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md
+++ b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md
@@ -18,6 +18,7 @@ Keep in sync with doco generated from /docs/execution-providers/CoreML-Execution
 |ai.onnx:Relu||
 |ai.onnx:Reshape||
 |ai.onnx:Resize|See [resize_op_builder.cc](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc) implementation. There are too many permutations to describe the valid combinations.|
+|ai.onnx.Slice|starts/ends/axes/steps must be constant initializers.|
 |ai.onnx:Sub||
 |ai.onnx:Sigmoid||
 |ai:onnx:Tanh||