From 724f9a13f1b89feb2e3c637caae4bac802602bdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Mon, 4 Sep 2023 17:35:18 +0200 Subject: [PATCH 01/14] fixed performance tuning link --- docs/FAQ.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/FAQ.md b/docs/FAQ.md index e039f4e4e4160..a0973ca2c94ad 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -2,7 +2,7 @@ Here are some commonly raised questions from users of ONNX Runtime and brought up in [Issues](https://github.com/microsoft/onnxruntime/issues). ## Do the GPU builds support quantized models? -The default CUDA build supports 3 standard quantization operators: QuantizeLinear, DequantizeLinear, and MatMulInteger. The TensorRT EP has limited support for INT8 quantized ops. In general, support of quantized models through ORT is continuing to expand on a model-driven basis. For performance improvements, quantization is not always required, and we suggest trying alternative strategies to [performance tune](./ONNX_Runtime_Perf_Tuning.md) before determining that quantization is necessary. +The default CUDA build supports 3 standard quantization operators: QuantizeLinear, DequantizeLinear, and MatMulInteger. The TensorRT EP has limited support for INT8 quantized ops. In general, support of quantized models through ORT is continuing to expand on a model-driven basis. For performance improvements, quantization is not always required, and we suggest trying alternative strategies to [performance tune](https://onnxruntime.ai/docs/performance/tune-performance/) before determining that quantization is necessary. ## How do I change the severity level of the default logger to something other than the default (WARNING)? Setting the severity level to VERBOSE is most useful when debugging errors. From 9e68d5e52cc1bf570dcb2395ae59a0e13b09394f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Mon, 4 Sep 2023 17:37:02 +0200 Subject: [PATCH 02/14] fixed python api link --- docs/FAQ.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/FAQ.md b/docs/FAQ.md index a0973ca2c94ad..70bedbd02e944 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -8,7 +8,7 @@ The default CUDA build supports 3 standard quantization operators: QuantizeLinea Setting the severity level to VERBOSE is most useful when debugging errors. Refer to the API documentation: -* Python - [RunOptions.log_severity_level](https://microsoft.github.io/onnxruntime/python/api_summary.html#onnxruntime.RunOptions.log_severity_level) +* Python - [RunOptions.log_severity_level](https://onnxruntime.ai/docs/api/python/api_summary.html#onnxruntime.RunOptions.log_severity_level) ``` import onnxruntime as ort ort.set_default_logger_severity(0) From 4e498dc718c21b78d2be1d01deafdef6a99c1aba Mon Sep 17 00:00:00 2001 From: nicolo-lucchesi Date: Wed, 27 Sep 2023 22:48:40 +0200 Subject: [PATCH 03/14] support only contiguous model outputs --- onnxruntime/core/providers/coreml/model/model.mm | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 60e0b1c061a43..1b7ecdc6ebc24 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -169,6 +169,14 @@ Status CreateInputFeatureProvider(const std::unordered_map&)inputs ") do not match"); } + ORT_RETURN_IF_NOT(IsArrayContiguous(data), + "Non contiguous output MLMultiArray are not currently supported"); const void* model_output_buffer = data.dataPointer; if (model_output_buffer == nullptr) { From ce771708bb36605e7d6f794f9fec275a32ee088b Mon Sep 17 00:00:00 2001 From: nicolo-lucchesi Date: Wed, 27 Sep 2023 23:42:41 +0200 Subject: [PATCH 04/14] replace MLMultiArray.dataPointer with getBytesWithHandler --- onnxruntime/core/providers/coreml/model/model.mm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 1b7ecdc6ebc24..8ded9df08c042 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -334,7 +334,10 @@ - (Status)predict:(const std::unordered_map&)inputs ORT_RETURN_IF_NOT(IsArrayContiguous(data), "Non contiguous output MLMultiArray are not currently supported"); - const void* model_output_buffer = data.dataPointer; + __block const void* model_output_buffer=nil; + [data getBytesWithHandler:^(const void *bytes, NSInteger size) { + model_output_buffer = bytes; + }]; if (model_output_buffer == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "model_output_buffer has no data for ", output_name); From b2e8fdbf87822c960b91bfa9c058f815d3c990fa Mon Sep 17 00:00:00 2001 From: nicolo-lucchesi Date: Sat, 30 Sep 2023 13:58:35 +0200 Subject: [PATCH 05/14] pr review changes --- onnxruntime/core/providers/coreml/model/model.mm | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 8ded9df08c042..574adc20834db 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -171,10 +171,10 @@ Status CreateInputFeatureProvider(const std::unordered_map&)inputs } ORT_RETURN_IF_NOT(IsArrayContiguous(data), - "Non contiguous output MLMultiArray are not currently supported"); + "Non-contiguous output MLMultiArray is not currently supported"); __block const void* model_output_buffer=nil; [data getBytesWithHandler:^(const void *bytes, NSInteger size) { model_output_buffer = bytes; From 37e39e51664c4af21d2df27b7574a7a33c28e46e Mon Sep 17 00:00:00 2001 From: nicolo-lucchesi Date: Sat, 30 Sep 2023 14:31:11 +0200 Subject: [PATCH 06/14] checking getBytesWithHandler returned size against expected one --- .../core/providers/coreml/model/model.mm | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 574adc20834db..a12b3a4a0465f 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -170,12 +170,12 @@ Status CreateInputFeatureProvider(const std::unordered_map&)inputs } ORT_RETURN_IF_NOT(IsArrayContiguous(data), - "Non-contiguous output MLMultiArray is not currently supported"); - __block const void* model_output_buffer=nil; - [data getBytesWithHandler:^(const void *bytes, NSInteger size) { - model_output_buffer = bytes; + "Non-contiguous output MLMultiArray is not currently supported"); + __block const void* model_output_buffer = nil; + __block int64_t coreml_buffer_size = 0; + [data getBytesWithHandler:^(const void* bytes, NSInteger size) { + model_output_buffer = bytes; + coreml_buffer_size = size; }]; if (model_output_buffer == nullptr) { @@ -347,11 +349,13 @@ - (Status)predict:(const std::unordered_map&)inputs switch (onnx_data_type) { case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { const auto output_data_byte_size = num_elements * sizeof(float); + ORT_RETURN_IF_NOT(coreml_buffer_size == output_data_byte_size, "CoreML output buffer size and expected output size differ"); memcpy(output_buffer, model_output_buffer, output_data_byte_size); break; } case ONNX_NAMESPACE::TensorProto_DataType_INT32: { const auto output_data_byte_size = num_elements * sizeof(int32_t); + ORT_RETURN_IF_NOT(coreml_buffer_size == output_data_byte_size, "CoreML output buffer size and expected output size differ"); memcpy(output_buffer, model_output_buffer, output_data_byte_size); break; } @@ -361,7 +365,7 @@ - (Status)predict:(const std::unordered_map&)inputs case ONNX_NAMESPACE::TensorProto_DataType_INT64: { ORT_RETURN_IF_NOT(data.dataType == MLMultiArrayDataTypeInt32, "CoreML output data type is not MLMultiArrayDataTypeInt32"); - + ORT_RETURN_IF_NOT(coreml_buffer_size == num_elements * sizeof(int32_t), "CoreML output buffer size and expected output size differ"); const auto model_output_span = gsl::span{static_cast(model_output_buffer), num_elements}; const auto output_span = gsl::span{static_cast(output_buffer), num_elements}; std::transform(model_output_span.begin(), model_output_span.end(), output_span.begin(), From 6dda63f88ab397a9878291a31c32a7ecb51aba48 Mon Sep 17 00:00:00 2001 From: nicolo-lucchesi Date: Mon, 2 Oct 2023 21:25:48 +0200 Subject: [PATCH 07/14] 120 chars line length limit --- onnxruntime/core/providers/coreml/model/model.mm | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index a12b3a4a0465f..cbee7975b2525 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -349,13 +349,15 @@ - (Status)predict:(const std::unordered_map&)inputs switch (onnx_data_type) { case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { const auto output_data_byte_size = num_elements * sizeof(float); - ORT_RETURN_IF_NOT(coreml_buffer_size == output_data_byte_size, "CoreML output buffer size and expected output size differ"); + ORT_RETURN_IF_NOT(coreml_buffer_size == output_data_byte_size, + "CoreML output buffer size and expected output size differ"); memcpy(output_buffer, model_output_buffer, output_data_byte_size); break; } case ONNX_NAMESPACE::TensorProto_DataType_INT32: { const auto output_data_byte_size = num_elements * sizeof(int32_t); - ORT_RETURN_IF_NOT(coreml_buffer_size == output_data_byte_size, "CoreML output buffer size and expected output size differ"); + ORT_RETURN_IF_NOT(coreml_buffer_size == output_data_byte_size, + "CoreML output buffer size and expected output size differ"); memcpy(output_buffer, model_output_buffer, output_data_byte_size); break; } @@ -365,7 +367,8 @@ - (Status)predict:(const std::unordered_map&)inputs case ONNX_NAMESPACE::TensorProto_DataType_INT64: { ORT_RETURN_IF_NOT(data.dataType == MLMultiArrayDataTypeInt32, "CoreML output data type is not MLMultiArrayDataTypeInt32"); - ORT_RETURN_IF_NOT(coreml_buffer_size == num_elements * sizeof(int32_t), "CoreML output buffer size and expected output size differ"); + ORT_RETURN_IF_NOT(coreml_buffer_size == num_elements * sizeof(int32_t), + "CoreML output buffer size and expected output size differ"); const auto model_output_span = gsl::span{static_cast(model_output_buffer), num_elements}; const auto output_span = gsl::span{static_cast(output_buffer), num_elements}; std::transform(model_output_span.begin(), model_output_span.end(), output_span.begin(), From 7b1128e81d5ed9aa02c5173852fdb8a4ec9f1147 Mon Sep 17 00:00:00 2001 From: nicolo-lucchesi Date: Wed, 4 Oct 2023 10:47:40 +0200 Subject: [PATCH 08/14] fixed comparison types errors --- onnxruntime/core/providers/coreml/model/model.mm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index cbee7975b2525..87b438e8c30ee 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -174,7 +174,7 @@ bool IsArrayContiguous(MLMultiArray* array) { int64_t batch_stride = [array.strides[0] longLongValue]; const auto* shape = array.shape; int64_t batch_elems = 1; - for (int i = 1; i < [shape count]; i++) batch_elems *= [shape[i] longLongValue]; + for (unsigned long i = 1; i < shape.count; i++) batch_elems *= [shape[i] longLongValue]; return batch_stride == batch_elems; } } // namespace @@ -335,7 +335,7 @@ - (Status)predict:(const std::unordered_map&)inputs ORT_RETURN_IF_NOT(IsArrayContiguous(data), "Non-contiguous output MLMultiArray is not currently supported"); __block const void* model_output_buffer = nil; - __block int64_t coreml_buffer_size = 0; + __block unsigned long coreml_buffer_size = 0; [data getBytesWithHandler:^(const void* bytes, NSInteger size) { model_output_buffer = bytes; coreml_buffer_size = size; From c664a001d283640b347c51b2b89488c84abdd1fe Mon Sep 17 00:00:00 2001 From: nicolo-lucchesi Date: Tue, 7 Nov 2023 17:59:15 +0100 Subject: [PATCH 09/14] checking getBytesWithHandler availability with @available --- .../core/providers/coreml/model/model.mm | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 87b438e8c30ee..4d62a216e1ec9 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -336,10 +336,17 @@ - (Status)predict:(const std::unordered_map&)inputs "Non-contiguous output MLMultiArray is not currently supported"); __block const void* model_output_buffer = nil; __block unsigned long coreml_buffer_size = 0; - [data getBytesWithHandler:^(const void* bytes, NSInteger size) { - model_output_buffer = bytes; - coreml_buffer_size = size; - }]; + bool skip_buffer_size_check = false; + if (@available(macOS 12.3, iOS 15.4, *)) { + [data getBytesWithHandler:^(const void* bytes, NSInteger size) { + model_output_buffer = bytes; + coreml_buffer_size = size; + }]; + } else { + model_output_buffer = data.dataPointer; + // disable size check as old API does not return buffer length + skip_buffer_size_check = true; + } if (model_output_buffer == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "model_output_buffer has no data for ", output_name); @@ -349,14 +356,14 @@ - (Status)predict:(const std::unordered_map&)inputs switch (onnx_data_type) { case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { const auto output_data_byte_size = num_elements * sizeof(float); - ORT_RETURN_IF_NOT(coreml_buffer_size == output_data_byte_size, + ORT_RETURN_IF_NOT(skip_buffer_size_check || coreml_buffer_size == output_data_byte_size, "CoreML output buffer size and expected output size differ"); memcpy(output_buffer, model_output_buffer, output_data_byte_size); break; } case ONNX_NAMESPACE::TensorProto_DataType_INT32: { const auto output_data_byte_size = num_elements * sizeof(int32_t); - ORT_RETURN_IF_NOT(coreml_buffer_size == output_data_byte_size, + ORT_RETURN_IF_NOT(skip_buffer_size_check || coreml_buffer_size == output_data_byte_size, "CoreML output buffer size and expected output size differ"); memcpy(output_buffer, model_output_buffer, output_data_byte_size); break; @@ -367,7 +374,7 @@ - (Status)predict:(const std::unordered_map&)inputs case ONNX_NAMESPACE::TensorProto_DataType_INT64: { ORT_RETURN_IF_NOT(data.dataType == MLMultiArrayDataTypeInt32, "CoreML output data type is not MLMultiArrayDataTypeInt32"); - ORT_RETURN_IF_NOT(coreml_buffer_size == num_elements * sizeof(int32_t), + ORT_RETURN_IF_NOT(skip_buffer_size_check || coreml_buffer_size == num_elements * sizeof(int32_t), "CoreML output buffer size and expected output size differ"); const auto model_output_span = gsl::span{static_cast(model_output_buffer), num_elements}; const auto output_span = gsl::span{static_cast(output_buffer), num_elements}; From bfe9062515f5b02eed3571efc20f5cbd70cd2639 Mon Sep 17 00:00:00 2001 From: nicolo-lucchesi Date: Fri, 10 Nov 2023 16:53:25 +0100 Subject: [PATCH 10/14] refactored tensor buffer copy into own function --- .../core/providers/coreml/model/model.mm | 112 ++++++++++-------- 1 file changed, 60 insertions(+), 52 deletions(-) diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 4d62a216e1ec9..fda472abe2cb5 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -23,6 +23,8 @@ #include "core/providers/coreml/model/host_utils.h" #include "core/providers/coreml/shape_utils.h" +#define HAS_GET_BYTES_WITH_HANDLER_API @available(macOS 12.3, iOS 15.4, *) + // force the linker to create a dependency on the CoreML framework so that in MAUI usage we don't need // to manually do this asm(".linker_option \"-framework\", \"CoreML\""); @@ -177,6 +179,52 @@ bool IsArrayContiguous(MLMultiArray* array) { for (unsigned long i = 1; i < shape.count; i++) batch_elems *= [shape[i] longLongValue]; return batch_stride == batch_elems; } + +onnxruntime::common::Status copyMLMultiArrayBuffer(const void* mlmultiarray_buffer, void* tensor_buffer, + MLMultiArray* array_info, const OnnxTensorInfo& tensor_info, + bool skip_buffer_size_check = true, + const unsigned long mlmultiarray_buffer_size = 0) { + if (mlmultiarray_buffer == nullptr) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "mlmultiarray_buffer has no data"); + } + + const size_t num_elements = array_info.count; + const auto onnx_data_type = tensor_info.data_type; + switch (onnx_data_type) { + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { + const auto output_data_byte_size = num_elements * sizeof(float); + ORT_RETURN_IF_NOT(skip_buffer_size_check || mlmultiarray_buffer_size == output_data_byte_size, + "CoreML output buffer size and expected output size differ"); + memcpy(tensor_buffer, mlmultiarray_buffer, output_data_byte_size); + break; + } + case ONNX_NAMESPACE::TensorProto_DataType_INT32: { + const auto output_data_byte_size = num_elements * sizeof(int32_t); + ORT_RETURN_IF_NOT(skip_buffer_size_check || mlmultiarray_buffer_size == output_data_byte_size, + "CoreML output buffer size and expected output size differ"); + memcpy(tensor_buffer, mlmultiarray_buffer, output_data_byte_size); + break; + } + // For this case, since Coreml Spec only uses int32 for model output while onnx provides + // int64 for model output data type. We are doing a type casting (int32 -> int64) here + // when copying the model to ORT + case ONNX_NAMESPACE::TensorProto_DataType_INT64: { + ORT_RETURN_IF_NOT(array_info.dataType == MLMultiArrayDataTypeInt32, + "CoreML output data type is not MLMultiArrayDataTypeInt32"); + ORT_RETURN_IF_NOT(skip_buffer_size_check || mlmultiarray_buffer_size == num_elements * sizeof(int32_t), + "CoreML output buffer size and expected output size differ"); + const auto model_output_span = gsl::span{static_cast(mlmultiarray_buffer), num_elements}; + const auto output_span = gsl::span{static_cast(tensor_buffer), num_elements}; + std::transform(model_output_span.begin(), model_output_span.end(), output_span.begin(), + [](int32_t v) { return static_cast(v); }); + break; + } + default: + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, + "Output data type is not supported, actual type: ", onnx_data_type); + } + return onnxruntime::common::Status::OK(); +} } // namespace NS_ASSUME_NONNULL_BEGIN @@ -306,9 +354,9 @@ - (Status)predict:(const std::unordered_map&)inputs return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "output_features has no value for ", output_name); } - auto* data = [output_value multiArrayValue]; + __block MLMultiArray* data = [output_value multiArrayValue]; - const auto coreml_static_output_shape = [&]() { + const auto coreml_static_output_shape = [](MLMultiArray* data) { InlinedVector result; result.reserve(data.shape.count); for (NSNumber* dim in data.shape) { @@ -316,13 +364,13 @@ - (Status)predict:(const std::unordered_map&)inputs result.push_back(dim_value); } return result; - }(); + }(data); const auto static_output_shape = GetStaticOutputShape(output_tensor_info.shape, coreml_static_output_shape, *logger_); - void* output_buffer = get_output_tensor_mutable_raw_data_fn(output_name, output_tensor_info.data_type, - static_output_shape); + __block void* output_buffer = get_output_tensor_mutable_raw_data_fn(output_name, output_tensor_info.data_type, + static_output_shape); if (const size_t num_elements = data.count; num_elements > 0) { if (const auto shape_size = ShapeSize(static_output_shape); @@ -334,58 +382,18 @@ - (Status)predict:(const std::unordered_map&)inputs ORT_RETURN_IF_NOT(IsArrayContiguous(data), "Non-contiguous output MLMultiArray is not currently supported"); - __block const void* model_output_buffer = nil; - __block unsigned long coreml_buffer_size = 0; - bool skip_buffer_size_check = false; - if (@available(macOS 12.3, iOS 15.4, *)) { + __block Status output_status; + __block const auto tensor_info = output_tensor_info; + if (HAS_GET_BYTES_WITH_HANDLER_API) { [data getBytesWithHandler:^(const void* bytes, NSInteger size) { - model_output_buffer = bytes; - coreml_buffer_size = size; + output_status = copyMLMultiArrayBuffer(bytes, output_buffer, data, tensor_info, true, size); }]; } else { - model_output_buffer = data.dataPointer; // disable size check as old API does not return buffer length - skip_buffer_size_check = true; - } - - if (model_output_buffer == nullptr) { - return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "model_output_buffer has no data for ", output_name); - } - - const auto onnx_data_type = output_tensor_info.data_type; - switch (onnx_data_type) { - case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { - const auto output_data_byte_size = num_elements * sizeof(float); - ORT_RETURN_IF_NOT(skip_buffer_size_check || coreml_buffer_size == output_data_byte_size, - "CoreML output buffer size and expected output size differ"); - memcpy(output_buffer, model_output_buffer, output_data_byte_size); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_INT32: { - const auto output_data_byte_size = num_elements * sizeof(int32_t); - ORT_RETURN_IF_NOT(skip_buffer_size_check || coreml_buffer_size == output_data_byte_size, - "CoreML output buffer size and expected output size differ"); - memcpy(output_buffer, model_output_buffer, output_data_byte_size); - break; - } - // For this case, since Coreml Spec only uses int32 for model output while onnx provides - // int64 for model output data type. We are doing a type casting (int32 -> int64) here - // when copying the model to ORT - case ONNX_NAMESPACE::TensorProto_DataType_INT64: { - ORT_RETURN_IF_NOT(data.dataType == MLMultiArrayDataTypeInt32, - "CoreML output data type is not MLMultiArrayDataTypeInt32"); - ORT_RETURN_IF_NOT(skip_buffer_size_check || coreml_buffer_size == num_elements * sizeof(int32_t), - "CoreML output buffer size and expected output size differ"); - const auto model_output_span = gsl::span{static_cast(model_output_buffer), num_elements}; - const auto output_span = gsl::span{static_cast(output_buffer), num_elements}; - std::transform(model_output_span.begin(), model_output_span.end(), output_span.begin(), - [](int32_t v) { return static_cast(v); }); - break; - } - default: - return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, - "Output data type is not supported, actual type: ", onnx_data_type); + output_status = copyMLMultiArrayBuffer(data.dataPointer, output_buffer, data, tensor_info, false); } + if (!output_status.IsOK()) + return output_status; } } } From f29e5ae5429dcf595d65dc00faafb902d689f4a3 Mon Sep 17 00:00:00 2001 From: nicolo-lucchesi Date: Mon, 13 Nov 2023 10:35:02 +0100 Subject: [PATCH 11/14] fixed variable qualifiers and naming --- .../core/providers/coreml/model/model.mm | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index fda472abe2cb5..85ecee626723d 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -10,6 +10,7 @@ #include #include #include +#include #include "core/common/common.h" #include "core/common/gsl.h" @@ -23,8 +24,6 @@ #include "core/providers/coreml/model/host_utils.h" #include "core/providers/coreml/shape_utils.h" -#define HAS_GET_BYTES_WITH_HANDLER_API @available(macOS 12.3, iOS 15.4, *) - // force the linker to create a dependency on the CoreML framework so that in MAUI usage we don't need // to manually do this asm(".linker_option \"-framework\", \"CoreML\""); @@ -180,10 +179,10 @@ bool IsArrayContiguous(MLMultiArray* array) { return batch_stride == batch_elems; } -onnxruntime::common::Status copyMLMultiArrayBuffer(const void* mlmultiarray_buffer, void* tensor_buffer, - MLMultiArray* array_info, const OnnxTensorInfo& tensor_info, - bool skip_buffer_size_check = true, - const unsigned long mlmultiarray_buffer_size = 0) { +Status CopyMLMultiArrayBuffer(const void* mlmultiarray_buffer, void* tensor_buffer, + const MLMultiArray* array_info, + const OnnxTensorInfo& tensor_info, + const std::optional mlmultiarray_buffer_size) { if (mlmultiarray_buffer == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "mlmultiarray_buffer has no data"); } @@ -193,14 +192,14 @@ bool IsArrayContiguous(MLMultiArray* array) { switch (onnx_data_type) { case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { const auto output_data_byte_size = num_elements * sizeof(float); - ORT_RETURN_IF_NOT(skip_buffer_size_check || mlmultiarray_buffer_size == output_data_byte_size, + ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == output_data_byte_size, "CoreML output buffer size and expected output size differ"); memcpy(tensor_buffer, mlmultiarray_buffer, output_data_byte_size); break; } case ONNX_NAMESPACE::TensorProto_DataType_INT32: { const auto output_data_byte_size = num_elements * sizeof(int32_t); - ORT_RETURN_IF_NOT(skip_buffer_size_check || mlmultiarray_buffer_size == output_data_byte_size, + ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == output_data_byte_size, "CoreML output buffer size and expected output size differ"); memcpy(tensor_buffer, mlmultiarray_buffer, output_data_byte_size); break; @@ -211,7 +210,7 @@ bool IsArrayContiguous(MLMultiArray* array) { case ONNX_NAMESPACE::TensorProto_DataType_INT64: { ORT_RETURN_IF_NOT(array_info.dataType == MLMultiArrayDataTypeInt32, "CoreML output data type is not MLMultiArrayDataTypeInt32"); - ORT_RETURN_IF_NOT(skip_buffer_size_check || mlmultiarray_buffer_size == num_elements * sizeof(int32_t), + ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == num_elements * sizeof(int32_t), "CoreML output buffer size and expected output size differ"); const auto model_output_span = gsl::span{static_cast(mlmultiarray_buffer), num_elements}; const auto output_span = gsl::span{static_cast(tensor_buffer), num_elements}; @@ -223,7 +222,7 @@ bool IsArrayContiguous(MLMultiArray* array) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Output data type is not supported, actual type: ", onnx_data_type); } - return onnxruntime::common::Status::OK(); + return Status::OK(); } } // namespace @@ -354,9 +353,9 @@ - (Status)predict:(const std::unordered_map&)inputs return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "output_features has no value for ", output_name); } - __block MLMultiArray* data = [output_value multiArrayValue]; + MLMultiArray* data = [output_value multiArrayValue]; - const auto coreml_static_output_shape = [](MLMultiArray* data) { + const auto coreml_static_output_shape = [&data]() { InlinedVector result; result.reserve(data.shape.count); for (NSNumber* dim in data.shape) { @@ -364,7 +363,7 @@ - (Status)predict:(const std::unordered_map&)inputs result.push_back(dim_value); } return result; - }(data); + }(); const auto static_output_shape = GetStaticOutputShape(output_tensor_info.shape, coreml_static_output_shape, *logger_); @@ -382,18 +381,19 @@ - (Status)predict:(const std::unordered_map&)inputs ORT_RETURN_IF_NOT(IsArrayContiguous(data), "Non-contiguous output MLMultiArray is not currently supported"); - __block Status output_status; - __block const auto tensor_info = output_tensor_info; - if (HAS_GET_BYTES_WITH_HANDLER_API) { + __block Status copy_status; + const auto tensor_info = output_tensor_info; + // `getBytesWithHandler` replaces deprecated `.dataPointer` on new versions + if (@available(macOS 12.3, iOS 15.4, *)) { [data getBytesWithHandler:^(const void* bytes, NSInteger size) { - output_status = copyMLMultiArrayBuffer(bytes, output_buffer, data, tensor_info, true, size); + copy_status = CopyMLMultiArrayBuffer(bytes, output_buffer, data, tensor_info, size); }]; } else { // disable size check as old API does not return buffer length - output_status = copyMLMultiArrayBuffer(data.dataPointer, output_buffer, data, tensor_info, false); + copy_status = CopyMLMultiArrayBuffer(data.dataPointer, output_buffer, data, tensor_info, std::nullopt); } - if (!output_status.IsOK()) - return output_status; + if (!copy_status.IsOK()) + return copy_status; } } } From a78ef97cb8bfbf99073c027d7e0d425bacabd82a Mon Sep 17 00:00:00 2001 From: nicolo-lucchesi Date: Wed, 15 Nov 2023 09:21:52 +0100 Subject: [PATCH 12/14] better var qualifiers --- onnxruntime/core/providers/coreml/model/model.mm | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 85ecee626723d..80b5dd46779a6 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -8,9 +8,9 @@ #include #include +#include #include #include -#include #include "core/common/common.h" #include "core/common/gsl.h" @@ -171,7 +171,7 @@ Status CreateInputFeatureProvider(const std::unordered_map&)inputs MLMultiArray* data = [output_value multiArrayValue]; - const auto coreml_static_output_shape = [&data]() { + const auto coreml_static_output_shape = [data]() { InlinedVector result; result.reserve(data.shape.count); for (NSNumber* dim in data.shape) { @@ -368,8 +368,8 @@ - (Status)predict:(const std::unordered_map&)inputs const auto static_output_shape = GetStaticOutputShape(output_tensor_info.shape, coreml_static_output_shape, *logger_); - __block void* output_buffer = get_output_tensor_mutable_raw_data_fn(output_name, output_tensor_info.data_type, - static_output_shape); + void* output_buffer = get_output_tensor_mutable_raw_data_fn(output_name, output_tensor_info.data_type, + static_output_shape); if (const size_t num_elements = data.count; num_elements > 0) { if (const auto shape_size = ShapeSize(static_output_shape); From f88132d8a66dc1ae82af6fabebaf1adcc5ddf959 Mon Sep 17 00:00:00 2001 From: nicolo-lucchesi Date: Thu, 16 Nov 2023 13:48:11 +0100 Subject: [PATCH 13/14] changed tensor_info to pointer to avoid a shape vector copy --- onnxruntime/core/providers/coreml/model/model.mm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 80b5dd46779a6..543b1b15da1e0 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -181,14 +181,14 @@ bool IsArrayContiguous(const MLMultiArray* array) { Status CopyMLMultiArrayBuffer(const void* mlmultiarray_buffer, void* tensor_buffer, const MLMultiArray* array_info, - const OnnxTensorInfo& tensor_info, + const OnnxTensorInfo* tensor_info, const std::optional mlmultiarray_buffer_size) { if (mlmultiarray_buffer == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "mlmultiarray_buffer has no data"); } const size_t num_elements = array_info.count; - const auto onnx_data_type = tensor_info.data_type; + const auto onnx_data_type = tensor_info->data_type; switch (onnx_data_type) { case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { const auto output_data_byte_size = num_elements * sizeof(float); @@ -382,7 +382,7 @@ - (Status)predict:(const std::unordered_map&)inputs ORT_RETURN_IF_NOT(IsArrayContiguous(data), "Non-contiguous output MLMultiArray is not currently supported"); __block Status copy_status; - const auto tensor_info = output_tensor_info; + const auto *tensor_info = &output_tensor_info; // `getBytesWithHandler` replaces deprecated `.dataPointer` on new versions if (@available(macOS 12.3, iOS 15.4, *)) { [data getBytesWithHandler:^(const void* bytes, NSInteger size) { From dc82dfb56f624b10187303087eb2a58027e0463c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Fri, 17 Nov 2023 20:21:58 +0100 Subject: [PATCH 14/14] formatting Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com> --- onnxruntime/core/providers/coreml/model/model.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 543b1b15da1e0..4a6743e9e5c52 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -382,7 +382,7 @@ - (Status)predict:(const std::unordered_map&)inputs ORT_RETURN_IF_NOT(IsArrayContiguous(data), "Non-contiguous output MLMultiArray is not currently supported"); __block Status copy_status; - const auto *tensor_info = &output_tensor_info; + const auto* tensor_info = &output_tensor_info; // `getBytesWithHandler` replaces deprecated `.dataPointer` on new versions if (@available(macOS 12.3, iOS 15.4, *)) { [data getBytesWithHandler:^(const void* bytes, NSInteger size) {