Skip to content

Commit

Permalink
CoreML: Aggregated changes to add all required ops for priority model (
Browse files Browse the repository at this point in the history
…#21472)

### Description
<!-- Describe your changes. -->
Add these changes to one PR to simplify checkin
- Add Concat (#21423)
- Add DepthToSpace (#21426)
- Add LeakyRelu (#21453)
- Add test scripts (#21427)
- Add ability to set coreml flags from python (#21434)


Other changes
- updated partitioning utils to support dropping constant initializers
from a ComputeCapability's inputs.
- noticed that the list of inputs to the coreml model was unexpectedly
long due to this
- we copy constant initializers to a CoreML model so don't need the
originals, and if they remain as inputs ORT can't free them as they
appear to be in use.

### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
  • Loading branch information
skottmckay authored Jul 25, 2024
1 parent 3cdf4b9 commit b0e1f7f
Show file tree
Hide file tree
Showing 27 changed files with 783 additions and 137 deletions.
1 change: 1 addition & 0 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ exclude_patterns = [
'onnxruntime/core/mickey/gemm/**', # CUTLASS based libs recommends NO automatic code formatting
'winml/lib/Api.Image/shaders/**', # Contains data chunks
'onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h', # Bool Switches hang Clang
'onnxruntime/core/providers/coreml/mlprogram_test_scripts/**', # test scripts only
]
command = [
'python',
Expand Down
24 changes: 15 additions & 9 deletions include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -1408,6 +1408,11 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
RuntimeOptimizationRecordContainer& MutableRuntimeOptimizations() {
return runtime_optimizations_;
}

// We don't run Graph::Resolve() on an ORT format model, but a compiling EP may copy initializers to its
// compiled model during partitioning, leaving them unused in the ORT Graph. To allow the memory to be freed
// we need to manually run the cleanup that would usually happen as part of Graph::Resolve.
Status RemovedUnusedInitializersOrtFormat();
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)

// This friendship relationship should only be used to call Graph::Graph and
Expand Down Expand Up @@ -1541,12 +1546,6 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi

common::Status PerformTypeAndShapeInferencing(const ResolveOptions& options);

// Recursively find all subgraphs including nested subgraphs
void FindAllSubgraphs(std::vector<Graph*>& subgraphs);

// Iterate this Graph instance and all subgraphs, calling the provided function for each.
common::Status ForThisAndAllSubgraphs(const std::vector<Graph*>& subgraphs, std::function<Status(Graph&)> func);

common::Status InferAndVerifyTypeMatch(Node& node, const ONNX_NAMESPACE::OpSchema& op, const ResolveOptions& options);

// perform type and shape inferencing on the subgraph and Resolve to validate
Expand Down Expand Up @@ -1576,9 +1575,6 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
// Implementation for initializer replacement
Status ReplaceInitializedTensorImpl(ONNX_NAMESPACE::TensorProto new_initializer, bool is_external);

// Clear all unused initializers and NodeArgs
void CleanUnusedInitializersAndNodeArgs(const std::unordered_set<std::string>* initializer_names_to_preserve = nullptr);

std::vector<NodeArg*> CreateNodeArgs(const google::protobuf::RepeatedPtrField<std::string>& names,
const ArgNameToTypeMap& name_to_type_map);

Expand All @@ -1587,6 +1583,16 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
#endif // !defined(ORT_MINIMAL_BUILD)

#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)

// Recursively find all subgraphs including nested subgraphs
void FindAllSubgraphs(std::vector<Graph*>& subgraphs);

// Iterate this Graph instance and all subgraphs, calling the provided function for each.
common::Status ForThisAndAllSubgraphs(const std::vector<Graph*>& subgraphs, std::function<Status(Graph&)> func);

// Clear all unused initializers and NodeArgs
void CleanUnusedInitializersAndNodeArgs(const std::unordered_set<std::string>* initializer_names_to_preserve = nullptr);

Status PopulateNodeArgToProducerConsumerLookupsFromNodes();

template <typename TInstance>
Expand Down
60 changes: 37 additions & 23 deletions onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3254,27 +3254,6 @@ Status Graph::PerformTypeAndShapeInferencing(const ResolveOptions& options) {
return Status::OK();
}

void Graph::FindAllSubgraphs(std::vector<Graph*>& subgraphs) {
for (auto& node : Nodes()) {
for (auto& subgraph : node.MutableSubgraphs()) {
subgraphs.push_back(subgraph.get());
subgraph->FindAllSubgraphs(subgraphs);
}
}
}

Status Graph::ForThisAndAllSubgraphs(const std::vector<Graph*>& subgraphs, std::function<Status(Graph&)> func) {
auto status = func(*this);
ORT_RETURN_IF_ERROR(status);

for (auto& subgraph : subgraphs) {
status = func(*subgraph);
ORT_RETURN_IF_ERROR(status);
}

return status;
}

Status Graph::Resolve(const ResolveOptions& options) {
if (parent_graph_) {
// Resolve must start at the top level graph in-order to handle outer scope
Expand Down Expand Up @@ -3387,6 +3366,39 @@ void Graph::AddInitializedTensor(const TensorProto& tensor) {
ORT_IGNORE_RETURN_VALUE(GetOrCreateNodeArg(tensor.name(), &t));
}
}

void Graph::FindAllSubgraphs(std::vector<Graph*>& subgraphs) {
for (auto& node : Nodes()) {
for (auto& subgraph : node.MutableSubgraphs()) {
subgraphs.push_back(subgraph.get());
subgraph->FindAllSubgraphs(subgraphs);
}
}
}

Status Graph::ForThisAndAllSubgraphs(const std::vector<Graph*>& subgraphs, std::function<Status(Graph&)> func) {
auto status = func(*this);
ORT_RETURN_IF_ERROR(status);

for (auto& subgraph : subgraphs) {
status = func(*subgraph);
ORT_RETURN_IF_ERROR(status);
}

return status;
}

Status Graph::RemovedUnusedInitializersOrtFormat() {
std::vector<Graph*> all_subgraphs;
FindAllSubgraphs(all_subgraphs);
auto cleanup_func = [](Graph& graph) {
graph.CleanUnusedInitializersAndNodeArgs(nullptr);
return Status::OK();
};

auto result = ForThisAndAllSubgraphs(all_subgraphs, cleanup_func);
return result;
}
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)

const std::string& Graph::Name() const noexcept {
Expand Down Expand Up @@ -4122,6 +4134,9 @@ void Graph::ToGraphProtoInternal(ONNX_NAMESPACE::GraphProto& graph_proto) const
}
}

#endif // !defined(ORT_MINIMAL_BUILD)

#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
void Graph::CleanUnusedInitializersAndNodeArgs(const std::unordered_set<std::string>* initializer_names_to_preserve) {
// Node Args being used
std::unordered_set<const NodeArg*> used_args;
Expand Down Expand Up @@ -4253,8 +4268,7 @@ void Graph::CleanUnusedInitializersAndNodeArgs(const std::unordered_set<std::str
}
}
}

#endif // !defined(ORT_MINIMAL_BUILD)
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)

void Graph::ComputeOverridableInitializers() {
graph_overridable_initializers_.clear();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,30 @@ Status ActivationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
using namespace CoreML::Specification::MILSpec;
// https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#module-coremltools.converters.mil.mil.ops.defs.iOS15.activation
std::string_view coreml_op_type;
bool add_alpha = false;
if (op_type == "Sigmoid") {
coreml_op_type = "sigmoid";
} else if (op_type == "Tanh") {
coreml_op_type = "tanh";
} else if (op_type == "Relu") {
coreml_op_type = "relu";
} else if (op_type == "LeakyRelu") {
coreml_op_type = "leaky_relu";
add_alpha = true;
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"ActivationOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type);
}

std::unique_ptr<Operation> op = model_builder.CreateOperation(node, coreml_op_type);
AddOperationInput(*op, "x", node.InputDefs()[0]->Name());

if (add_alpha) {
NodeAttrHelper helper(node);
const auto alpha = helper.Get("alpha", 0.01f);
AddOperationInput(*op, "alpha", model_builder.AddScalarConstant(op->type(), "alpha", alpha));
}

AddOperationOutput(*op, *node.OutputDefs()[0]);

model_builder.AddOperation(std::move(op));
Expand Down Expand Up @@ -198,7 +209,7 @@ bool ActivationOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInp

#if defined(COREML_ENABLE_MLPROGRAM)
if (input_params.create_mlprogram) {
if (op_type == "PRelu" || op_type == "LeakyRelu") {
if (op_type == "PRelu") { // TODO: ML Program supports this so should be easy to enable
return false;
}
} else
Expand Down
24 changes: 23 additions & 1 deletion onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,33 @@ COREML_SPEC::MILSpec::NamedValueType CreateNamedTensorValueType(const NodeArg& n

void AddOperationInput(MILSpec::Operation& op, std::string_view input_name, std::string_view value_name) {
MILSpec::Argument arg;
arg.mutable_arguments()->Add()->set_name(std::string(value_name));
arg.mutable_arguments()->Add()->set_name(value_name.data(), value_name.size());

(*op.mutable_inputs())[input_name] = std::move(arg);
}

void AddOperationVariadicInput(MILSpec::Operation& op, std::string_view input_name,
const std::vector<std::string_view>& value_names) {
MILSpec::Argument arg;
for (const auto& value : value_names) {
arg.mutable_arguments()->Add()->set_name(value.data(), value.size());
}

(*op.mutable_inputs())[input_name] = std::move(arg);
}

void AddIntermediateOperationOutput(COREML_SPEC::MILSpec::Operation& op, std::string_view output_name,
int32_t element_type, std::optional<gsl::span<const int64_t>> shape) {
auto& outputs = *op.mutable_outputs();
auto& output_arg = *outputs.Add();
output_arg.set_name(output_name.data(), output_name.size());

MILSpec::ValueType& value = *output_arg.mutable_type();
MILSpec::TensorType& tensor_type = *value.mutable_tensortype();

SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(element_type), shape, /*convert_scalar*/ true);
}

void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output,
std::optional<int32_t> override_element_type) {
auto& outputs = *op.mutable_outputs();
Expand Down
20 changes: 20 additions & 0 deletions onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,26 @@ COREML_SPEC::MILSpec::NamedValueType CreateNamedTensorValueType(const NodeArg& n
void AddOperationInput(COREML_SPEC::MILSpec::Operation& op,
std::string_view input_name, std::string_view value_name);

/// <summary>
/// Add a variadic input argument to a MILSpec::Operation
/// </summary>
/// <param name="op">Operation to update.</param>
/// <param name="input name">The input name defined by the spec for the operation. </param>
/// <param name="value_names">The input value names.</param>
void AddOperationVariadicInput(COREML_SPEC::MILSpec::Operation& op, std::string_view input_name,
const std::vector<std::string_view>& value_names);

/// Add an output to a MILSpec::Operation for an intermediate operation when the implementation is composed of
/// multiple MLProgram operations. In this case we don't have a NodeArg for the output.
/// </summary>
/// <param name="op">Operation to update.</param>
/// <param name="output_name">Name of the intermediate output. Create using ModelBuilder::GetUniqueName.</param>
/// <param name="element_type">onnx::TensorProto_DataType element type of the output.
/// int32_t as that is what TensorShapeProto uses to store the value.</param>
/// <param name="shape">Shape of the output if known.</param>
void AddIntermediateOperationOutput(COREML_SPEC::MILSpec::Operation& op, std::string_view output_name,
int32_t element_type, std::optional<gsl::span<const int64_t>> shape);

/// <summary>
/// Add an output to a MILSpec::Operation. Name, data type and shape are used from the NodeArg.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "core/providers/common.h"
#include "core/providers/coreml/builders/helper.h"
#include "core/providers/coreml/builders/impl/base_op_builder.h"
#include "core/providers/coreml/builders/impl/builder_utils.h"
#include "core/providers/coreml/builders/model_builder.h"
#include "core/providers/coreml/builders/op_builder_factory.h"
#include "core/providers/coreml/shape_utils.h"
Expand All @@ -18,27 +19,51 @@ class ConcatOpBuilder : public BaseOpBuilder {

bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
const logging::Logger& logger) const override;

bool SupportsMLProgram() const override { return true; }
};

Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
const Node& node,
const logging::Logger& logger) const {
std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);

layer->mutable_concat()->set_sequenceconcat(false);

for (const auto* input : node.InputDefs()) {
LOGS(logger, VERBOSE) << "input name " << input->Name();
*layer->mutable_input()->Add() = input->Name();
#if defined(COREML_ENABLE_MLPROGRAM)
if (model_builder.CreateMLProgram()) {
using namespace CoreML::Specification::MILSpec; // NOLINT

NodeAttrHelper helper(node);
const auto axis = helper.GetInt64("axis"); // required
const auto interleave = false;

std::unique_ptr<Operation> op = model_builder.CreateOperation(node, "concat");
std::vector<std::string_view> input_names;
for (const auto* input : node.InputDefs()) {
input_names.emplace_back(input->Name());
}
AddOperationVariadicInput(*op, "values", input_names);
AddOperationInput(*op, "axis", model_builder.AddScalarConstant(op->type(), "axis", *axis));
AddOperationInput(*op, "interleave", model_builder.AddScalarConstant(op->type(), "interleave", interleave));
AddOperationOutput(*op, *node.OutputDefs()[0]);
model_builder.AddOperation(std::move(op));
} else // NOLINT
#endif // defined(COREML_ENABLE_MLPROGRAM)
{
std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);

layer->mutable_concat()->set_sequenceconcat(false);

for (const auto* input : node.InputDefs()) {
LOGS(logger, VERBOSE) << "input name " << input->Name();
*layer->mutable_input()->Add() = input->Name();
}

*layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();

model_builder.AddLayer(std::move(layer));
}

*layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();

model_builder.AddLayer(std::move(layer));
return Status::OK();
}

bool ConcatOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /* input_params */,
bool ConcatOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
const logging::Logger& logger) const {
const auto& input_defs = node.InputDefs();
if (input_defs.size() < 2) {
Expand All @@ -50,23 +75,25 @@ bool ConcatOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPa
if (!GetShape(*input_defs[0], input_shape, logger))
return false;

auto rank = input_shape.size();
if (rank != 4) {
// For some reason, the concat in CoreML running on 3d tensor will concat on wrong axis
// Instead of concat on axis 0, it will concat on axis 1
// Disable Concat support for 3d tensor for now
// TODO, add ExpandDims and Squeeze, 3d -ExpandDims-> 4d -> Concat -Squeeze-> 3d
LOGS(logger, VERBOSE) << "Concat only support 4d shape for now, input is "
<< rank << "d shape";
return false;
}

NodeAttrHelper helper(node);
auto axis = static_cast<size_t>(HandleNegativeAxis(helper.Get("axis", 1), rank));
if (rank != axis + 3) {
LOGS(logger, VERBOSE) << "Concat only support axis to be -3, actual axis: " << axis
<< ", actual rank: " << rank;
return false;
if (!input_params.create_mlprogram) {
auto rank = input_shape.size();
if (rank != 4) {
// For some reason, the concat in CoreML running on 3d tensor will concat on wrong axis
// Instead of concat on axis 0, it will concat on axis 1
// Disable Concat support for 3d tensor for now
// TODO: add ExpandDims and Squeeze, 3d -ExpandDims-> 4d -> Concat -Squeeze-> 3d
LOGS(logger, VERBOSE) << "Concat only support 4d shape for now, input is "
<< rank << "d shape";
return false;
}

NodeAttrHelper helper(node);
auto axis = static_cast<size_t>(HandleNegativeAxis(helper.Get("axis", 1), rank));
if (rank != axis + 3) {
LOGS(logger, VERBOSE) << "Concat only support axis to be -3, actual axis: " << axis
<< ", actual rank: " << rank;
return false;
}
}

return true;
Expand Down
Loading

0 comments on commit b0e1f7f

Please sign in to comment.