[CoreML] Add Softmax and Split op support (microsoft#18358)

### Description  As title. ### Motivation and Context  Added for yolov8 model missing operator support. microsoft#17654 Now the model support info looks like: _CoreMLExecutionProvider::GetCapability, number of partitions supported by CoreML: 3 number of nodes in the graph: 233 number of nodes supported by CoreML: 230_ (only missing 3 concat op support due to input 3d shape is not currently support in CoreML EP Concat). --------- Co-authored-by: rachguo <[email protected]> Co-authored-by: rachguo <[email protected]> Co-authored-by: Edward Chen <[email protected]>
kleiti · Mar 22, 2024 · e4a017d · e4a017d
1 parent 6b77262
commit e4a017d
Show file tree

Hide file tree

Showing 9 changed files with 394 additions and 7 deletions.
diff --git a/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc
@@ -0,0 +1,128 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
+
+#include "core/framework/tensorprotoutils.h"
+#include "core/providers/common.h"
+#include "core/providers/coreml/shape_utils.h"
+#include "core/providers/shared/utils/utils.h"
+
+#ifdef __APPLE__
+#include "core/providers/coreml/builders/model_builder.h"
+#endif
+#include "core/providers/coreml/builders/op_builder_factory.h"
+
+namespace onnxruntime {
+namespace coreml {
+
+class SoftmaxOpBuilder : public BaseOpBuilder {
+  // Add operator related
+#ifdef __APPLE__
+ private:
+  Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
+                               const logging::Logger& logger) const override;
+#endif
+
+  // Operator support related
+ private:
+  bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
+                         const logging::Logger& logger) const override;
+};
+
+// Add operator related
+
+#ifdef __APPLE__
+
+Status SoftmaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
+                                               const Node& node,
+                                               const logging::Logger& logger) const {
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  const auto& input_name = node.InputDefs()[0]->Name();
+  const auto& output_name = node.OutputDefs()[0]->Name();
+
+  std::vector<int64_t> data_shape;
+  ORT_RETURN_IF_NOT(GetStaticShape(*node.InputDefs()[0], data_shape, logger), "Failed to get input shape.");
+
+  NodeAttrHelper helper(node);
+  int32_t axis_default_value = (node.SinceVersion() < 13) ? 1 : -1;
+  const auto axis = helper.Get("axis", axis_default_value);
+  const auto axis_nonnegative = HandleNegativeAxis(axis, data_shape.size());
+
+  if (node.SinceVersion() >= 13 || (data_shape.size() == 2)) {
+    auto* coreml_softmaxnd = layer->mutable_softmaxnd();
+    coreml_softmaxnd->set_axis(axis);
+    *layer->mutable_input()->Add() = input_name;
+    *layer->mutable_output()->Add() = output_name;
+    model_builder.AddLayer(std::move(layer));
+  } else {
+    // note: if opsets < 13, onnx Softmax coerces the input shape to be 2D based on axis.
+    // we need to manually reshape to 2D and apply SoftmaxND to axis -1 to achieve equivalent results for CoreML.
+    TensorShape input_shape(data_shape);
+    const auto size_to_dimension = input_shape.SizeToDimension(axis_nonnegative);
+    const auto size_from_dimension = input_shape.SizeFromDimension(axis_nonnegative);
+
+    TensorShapeVector target_shape;
+    target_shape.push_back(size_to_dimension);
+    target_shape.push_back(size_from_dimension);
+
+    const auto reshape1_output_name = model_builder.GetUniqueName(MakeString(node.Name(), "reshape1_output"));
+    {  // Add reshape layer
+      const auto softmax_reshape1_layer_name =
+          model_builder.GetUniqueName(MakeString(node.Name(), "_Softmax_reshape1"));
+      auto reshape_layer = CreateNNLayer(softmax_reshape1_layer_name);
+      *reshape_layer->mutable_reshapestatic()->mutable_targetshape() = {target_shape.cbegin(), target_shape.cend()};
+      *reshape_layer->mutable_input()->Add() = input_name;
+      *reshape_layer->mutable_output()->Add() = reshape1_output_name;
+      model_builder.AddLayer(std::move(reshape_layer));
+    }
+    const auto softmax_output_name = model_builder.GetUniqueName(MakeString(node.Name(), "softmax_output"));
+    {
+      auto* coreml_softmaxnd = layer->mutable_softmaxnd();
+      coreml_softmaxnd->set_axis(-1);
+      *layer->mutable_input()->Add() = reshape1_output_name;
+      *layer->mutable_output()->Add() = softmax_output_name;
+      model_builder.AddLayer(std::move(layer));
+    }
+    {
+      // Add reshape back layer
+      const auto softmax_reshape2_layer_name =
+          model_builder.GetUniqueName(MakeString(node.Name(), "_Softmax_reshape2"));
+      auto reshape_layer = CreateNNLayer(softmax_reshape2_layer_name);
+      *reshape_layer->mutable_reshapestatic()->mutable_targetshape() = {data_shape.cbegin(), data_shape.cend()};
+      *reshape_layer->mutable_input()->Add() = softmax_output_name;
+      *reshape_layer->mutable_output()->Add() = output_name;
+      model_builder.AddLayer(std::move(reshape_layer));
+    }
+  }
+
+  return Status::OK();
+}
+
+#endif
+
+// Operator support related
+
+bool SoftmaxOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /* input_params */,
+                                         const logging::Logger& logger) const {
+  const auto& input_defs = node.InputDefs();
+  std::vector<int64_t> input_shape;
+  if (!GetStaticShape(*input_defs[0], input_shape, logger))
+    return false;
+
+  const TensorShape shape(input_shape);
+  if (shape.Size() == 0) {
+    LOGS(logger, VERBOSE) << "Empty input data is not supported.";
+    return false;
+  }
+
+  return true;
+}
+
+void CreateSoftmaxOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
+  op_registrations.builders.push_back(std::make_unique<SoftmaxOpBuilder>());
+  op_registrations.op_builder_map.emplace(op_type, op_registrations.builders.back().get());
+}
+
+}  // namespace coreml
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/coreml/builders/impl/split_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/split_op_builder.cc
@@ -0,0 +1,189 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
+
+#include "core/optimizer/initializer.h"
+#include "core/providers/common.h"
+#include "core/providers/coreml/builders/helper.h"
+#include "core/providers/coreml/builders/op_builder_factory.h"
+#include "core/providers/coreml/shape_utils.h"
+#include "core/providers/shared/utils/utils.h"
+
+#if defined(__APPLE__)
+#include "core/providers/coreml/builders/model_builder.h"
+#endif
+
+namespace onnxruntime {
+namespace coreml {
+
+class SplitOpBuilder : public BaseOpBuilder {
+  // Add operator related
+#ifdef __APPLE__
+ private:
+  void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
+
+ private:
+  Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
+                               const logging::Logger& logger) const override;
+#endif
+
+  // Operator support related
+ private:
+  bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
+                         const logging::Logger& logger) const override;
+
+  // Split opset 13- uses "split" as attribute. Currently it's not supported.
+  int GetMinSupportedOpSet(const Node& /* node */) const override { return 13; }
+};
+
+// Add operator related
+
+#ifdef __APPLE__
+
+void SplitOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
+  const auto& input_defs = node.InputDefs();
+
+  if (input_defs.size() > 1 && input_defs[1]->Exists()) {  // optional second input "split"
+    model_builder.AddInitializerToSkip(input_defs[1]->Name());
+  }
+}
+
+Status SplitOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
+                                             const Node& node,
+                                             const logging::Logger& logger) const {
+  const auto& input_defs = node.InputDefs();
+
+  std::vector<int64_t> data_shape;
+  ORT_RETURN_IF_NOT(GetShape(*node.InputDefs()[0], data_shape, logger), "Failed to get input shape.");
+
+  NodeAttrHelper helper(node);
+  const auto axis = helper.Get("axis", 0);
+
+  // attribute introduced since opset 18
+  uint64_t num_outputs;
+
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  auto* coreml_splitnd = layer->mutable_splitnd();
+  coreml_splitnd->set_axis(axis);
+
+  if (input_defs.size() > 1) {
+    // if "split" is explicitly provided as an input
+    const auto& split_tensor = *model_builder.GetInitializerTensors().at(input_defs[1]->Name());
+    Initializer unpacked_tensor(split_tensor);
+    auto split_span = unpacked_tensor.DataAsSpan<uint64_t>();
+    auto split_sizes = split_span.size();
+    num_outputs = narrow<uint64_t>(split_sizes);
+    for (size_t i = 0; i < split_sizes; i++) {
+      coreml_splitnd->add_splitsizes(split_span[i]);
+    }
+  } else if (node.SinceVersion() < 18) {
+    num_outputs = narrow<uint64_t>(node.OutputDefs().size());
+    coreml_splitnd->set_numsplits(num_outputs);
+  } else {
+    // note: for opset 18+ 'num_outputs' is a required attribute
+    num_outputs = narrow<uint64_t>(helper.GetInt("num_outputs").value());
+    // note: checked in IsOpSupportedImpl that ensures the dim value at splitting axis exists
+    auto split_dim_size = data_shape[HandleNegativeAxis(axis, data_shape.size())];
+    uint64_t chunk_size = narrow<uint64_t>((split_dim_size + num_outputs - 1) / num_outputs);
+    uint64_t remainder = split_dim_size % chunk_size;
+    if (remainder) {
+      // uneven
+      auto split_sizes = InlinedVector<uint64_t>(num_outputs, chunk_size);
+      split_sizes.back() = remainder;
+      for (size_t i = 0; i < split_sizes.size(); i++) {
+        coreml_splitnd->add_splitsizes(split_sizes[i]);
+      }
+    } else {
+      // even
+      coreml_splitnd->set_numsplits(num_outputs);
+    }
+  }
+
+  *layer->mutable_input()->Add() = node.InputDefs()[0]->Name();
+  // variadic number of outputs. Calculated based on the length of the given splitSizes if provided.
+  // Otherwise, uses attribute value 'num_outputs'.
+  for (uint64_t i = 0; i < num_outputs; i++) {
+    *layer->mutable_output()->Add() = node.OutputDefs()[i]->Name();
+  }
+  model_builder.AddLayer(std::move(layer));
+
+  return Status::OK();
+}
+
+#endif
+
+// Operator support related
+
+bool SplitOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
+                                       const logging::Logger& logger) const {
+  const auto& input_defs = node.InputDefs();
+  const auto& initializers = input_params.graph_viewer.GetAllInitializedTensors();
+
+  NodeAttrHelper helper(node);
+  const auto axis = helper.Get("axis", 0);
+
+  std::vector<int64_t> input_shape;
+  if (!GetShape(*input_defs[0], input_shape, logger))
+    return false;
+
+  const auto split_dims_at_axis = input_shape[HandleNegativeAxis(axis, input_shape.size())];
+  if (input_defs.size() > 1 && input_defs[1]->Exists()) {
+    if (!CheckIsConstantInitializer(*input_defs[1], input_params.graph_viewer, logger, "'split'")) {
+      return false;
+    }
+    const auto split_shape = *input_defs[1]->Shape();
+    if (split_shape.dim_size() < 2) {
+      LOGS(logger, VERBOSE) << "CoreML SplitND requires to produce at least 2 outputs.";
+      return false;
+    }
+    const auto& splits_tensor = *initializers.at(input_defs[1]->Name());
+    Initializer unpacked_tensor(splits_tensor);
+    auto splits_span = unpacked_tensor.DataAsSpan<uint64_t>();
+    int sum_of_splits = std::accumulate(splits_span.begin(), splits_span.end(), 0);
+    if (sum_of_splits != split_dims_at_axis) {
+      LOGS(logger, VERBOSE) << "Mismatch between the sum of 'split'. Expected: "
+                            << split_dims_at_axis
+                            << "Actual: "
+                            << sum_of_splits;
+      return false;
+    }
+    auto it = std::find(splits_span.begin(), splits_span.end(), 0);
+    if (it != splits_span.end()) {
+      LOGS(logger, VERBOSE) << "Invalid value in 'splits' input.";
+      return false;
+    }
+    if (split_dims_at_axis == -1) {
+      LOGS(logger, VERBOSE) << "Dim at the splitting axis is not allowed to be dynamic.";
+      return false;
+    }
+  } else {
+    if (node.SinceVersion() >= 18) {
+      const auto num_outputs = helper.GetInt("num_outputs");
+      if (!num_outputs.has_value()) {
+        LOGS(logger, VERBOSE) << "No 'num_outputs' provided. For split 18+, num_outputs is a required attribute.";
+        return false;
+      }
+      if (num_outputs.value() < 2) {
+        LOGS(logger, VERBOSE) << "Invalid num_outputs. The value cannot be lower than 2.\n"
+                              << "CoreML SplitND requires at least 2 outputs. num_outputs: " << num_outputs.value();
+        return false;
+      }
+      if (num_outputs.value() != static_cast<int32_t>(node.OutputDefs().size()) || num_outputs.value() > split_dims_at_axis) {
+        LOGS(logger, VERBOSE) << "Invalid num_outputs provided.\n."
+                              << "The value should be smaller or equal to the size of dimension being split. num_outputs: "
+                              << num_outputs.value();
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+void CreateSplitOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
+  op_registrations.builders.push_back(std::make_unique<SplitOpBuilder>());
+  op_registrations.op_builder_map.emplace(op_type, op_registrations.builders.back().get());
+}
+
+}  // namespace coreml
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc b/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc
@@ -122,6 +122,14 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() {
     CreateSliceOpBuilder("Slice", op_registrations);
   }
 
+  {  // Softmax
+    CreateSoftmaxOpBuilder("Softmax", op_registrations);
+  }
+
+  {  // Split
+    CreateSplitOpBuilder("Split", op_registrations);
+  }
+
   return op_registrations;
 }
 

diff --git a/onnxruntime/core/providers/coreml/builders/op_builder_factory.h b/onnxruntime/core/providers/coreml/builders/op_builder_factory.h
@@ -36,6 +36,8 @@ void CreateReshapeOpBuilder(const std::string& op_type, OpBuilderRegistrations&
 void CreateResizeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 void CreateShapeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 void CreateSliceOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
+void CreateSoftmaxOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
+void CreateSplitOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 void CreateSqueezeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 void CreateTransposeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 void CreateUnaryOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);

diff --git a/onnxruntime/core/providers/shared/utils/utils.cc b/onnxruntime/core/providers/shared/utils/utils.cc
@@ -166,6 +166,12 @@ std::vector<float> NodeAttrHelper::Get(const std::string& key, const std::vector
   return std::vector<float>{source.cbegin(), source.cend()};
 }
 
+std::optional<int64_t> NodeAttrHelper::GetInt(const std::string& key) const {
+  if (!HasAttr(key))
+    return std::nullopt;
+  return node_attributes_.at(key).i();
+}
+
 bool NodeAttrHelper::HasAttr(const std::string& key) const {
   return Contains(node_attributes_, key);
 }

diff --git a/onnxruntime/core/providers/shared/utils/utils.h b/onnxruntime/core/providers/shared/utils/utils.h
@@ -6,6 +6,7 @@
 #include <cstdint>
 #include <string>
 #include <vector>
+#include <optional>
 
 #include "core/graph/basic_types.h"
 
@@ -57,6 +58,8 @@ class NodeAttrHelper {
   uint32_t Get(const std::string& key, uint32_t def_val) const;
   std::vector<uint32_t> Get(const std::string& key, const std::vector<uint32_t>& def_val) const;
 
+  std::optional<int64_t> GetInt(const std::string& key) const;
+
   bool HasAttr(const std::string& key) const;
 
  private:

diff --git a/onnxruntime/test/providers/cpu/math/softmax_test.cc b/onnxruntime/test/providers/cpu/math/softmax_test.cc
@@ -421,7 +421,7 @@ TEST(SoftmaxOperator, GH15949_regression_test) {
                           {0.00032932f, 0.01798029f, 0.9816904f});
 
   // disable TRT as it does not support axis=0 as used by the model
-  tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kCoreMLExecutionProvider});
+  tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
 }
 
 }  // namespace test