From 92d5f7c68d89f30f0465e381c686ab11bf14e788 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= <nicolo.lucchesi@gmail.com>
Date: Sat, 18 Nov 2023 02:58:49 +0100
Subject: [PATCH] [CoreML] Adapt to `MLMultiArray.dataPointer` deprecation
 (#17726)

### Description
This PR addresses https://github.com/microsoft/onnxruntime/issues/17652.
The deprecated `MLMultiArray.dataPointer` is replaced with
`.getBytesWithHandler`, as suggested by the docs.
For now, I am only checking that the output `MLMultiArray` is
contiguous, returning unsupported operation when that is not the case.
I think this is already better than what we have right now, so we can
block unsafe calls to `.dataPointer` (if any..).

I would be happy to implement the handling of the non-contiguous case
(replacing `memcpy` for such cases) as suggested by @edgchen1, but I am
not sure how to reproduce that case to add a corresponding unit-test.
Would we have to define a custom `MLCustomLayer` to get a non-contiguous
output from a model..?

### Motivation and Context
Fix https://github.com/microsoft/onnxruntime/issues/17652.

---------

Co-authored-by: nicolo-lucchesi <nicolo.lucchesi@hexagon.com>
Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com>
---
 .../core/providers/coreml/model/model.mm      | 107 ++++++++++++------
 1 file changed, 71 insertions(+), 36 deletions(-)
diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm
index 60e0b1c061a43..4a6743e9e5c52 100644
--- a/onnxruntime/core/providers/coreml/model/model.mm
+++ b/onnxruntime/core/providers/coreml/model/model.mm
@@ -8,6 +8,7 @@
 
 #include <algorithm>
 #include <cstdint>
+#include <optional>
 #include <unordered_map>
 #include <vector>
 
@@ -169,6 +170,60 @@ Status CreateInputFeatureProvider(const std::unordered_map<std::string, OnnxTens
   conversion_buffers_out = std::move(conversion_buffers);
   return Status::OK();
 }
+
+bool IsArrayContiguous(const MLMultiArray* array) {
+  int64_t batch_stride = [array.strides[0] longLongValue];
+  const auto* shape = array.shape;
+  int64_t batch_elems = 1;
+  for (unsigned long i = 1; i < shape.count; i++) batch_elems *= [shape[i] longLongValue];
+  return batch_stride == batch_elems;
+}
+
+Status CopyMLMultiArrayBuffer(const void* mlmultiarray_buffer, void* tensor_buffer,
+                              const MLMultiArray* array_info,
+                              const OnnxTensorInfo* tensor_info,
+                              const std::optional<unsigned long> mlmultiarray_buffer_size) {
+  if (mlmultiarray_buffer == nullptr) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "mlmultiarray_buffer has no data");
+  }
+
+  const size_t num_elements = array_info.count;
+  const auto onnx_data_type = tensor_info->data_type;
+  switch (onnx_data_type) {
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
+      const auto output_data_byte_size = num_elements * sizeof(float);
+      ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == output_data_byte_size,
+                        "CoreML output buffer size and expected output size differ");
+      memcpy(tensor_buffer, mlmultiarray_buffer, output_data_byte_size);
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
+      const auto output_data_byte_size = num_elements * sizeof(int32_t);
+      ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == output_data_byte_size,
+                        "CoreML output buffer size and expected output size differ");
+      memcpy(tensor_buffer, mlmultiarray_buffer, output_data_byte_size);
+      break;
+    }
+    // For this case, since Coreml Spec only uses int32 for model output while onnx provides
+    // int64 for model output data type. We are doing a type casting (int32 -> int64) here
+    // when copying the model to ORT
+    case ONNX_NAMESPACE::TensorProto_DataType_INT64: {
+      ORT_RETURN_IF_NOT(array_info.dataType == MLMultiArrayDataTypeInt32,
+                        "CoreML output data type is not MLMultiArrayDataTypeInt32");
+      ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == num_elements * sizeof(int32_t),
+                        "CoreML output buffer size and expected output size differ");
+      const auto model_output_span = gsl::span{static_cast<const int32_t*>(mlmultiarray_buffer), num_elements};
+      const auto output_span = gsl::span{static_cast<int64_t*>(tensor_buffer), num_elements};
+      std::transform(model_output_span.begin(), model_output_span.end(), output_span.begin(),
+                     [](int32_t v) { return static_cast<int64_t>(v); });
+      break;
+    }
+    default:
+      return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
+                             "Output data type is not supported, actual type: ", onnx_data_type);
+  }
+  return Status::OK();
+}
 }  // namespace
 
 NS_ASSUME_NONNULL_BEGIN
@@ -298,9 +353,9 @@ - (Status)predict:(const std::unordered_map<std::string, OnnxTensorData>&)inputs
         return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "output_features has no value for ", output_name);
       }
 
-      auto* data = [output_value multiArrayValue];
+      MLMultiArray* data = [output_value multiArrayValue];
 
-      const auto coreml_static_output_shape = [&]() {
+      const auto coreml_static_output_shape = [data]() {
         InlinedVector<int64_t> result;
         result.reserve(data.shape.count);
         for (NSNumber* dim in data.shape) {
@@ -324,41 +379,21 @@ - (Status)predict:(const std::unordered_map<std::string, OnnxTensorData>&)inputs
                                  ") do not match");
         }
 
-        const void* model_output_buffer = data.dataPointer;
-
-        if (model_output_buffer == nullptr) {
-          return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "model_output_buffer has no data for ", output_name);
-        }
-
-        const auto onnx_data_type = output_tensor_info.data_type;
-        switch (onnx_data_type) {
-          case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
-            const auto output_data_byte_size = num_elements * sizeof(float);
-            memcpy(output_buffer, model_output_buffer, output_data_byte_size);
-            break;
-          }
-          case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
-            const auto output_data_byte_size = num_elements * sizeof(int32_t);
-            memcpy(output_buffer, model_output_buffer, output_data_byte_size);
-            break;
-          }
-          // For this case, since Coreml Spec only uses int32 for model output while onnx provides
-          // int64 for model output data type. We are doing a type casting (int32 -> int64) here
-          // when copying the model to ORT
-          case ONNX_NAMESPACE::TensorProto_DataType_INT64: {
-            ORT_RETURN_IF_NOT(data.dataType == MLMultiArrayDataTypeInt32,
-                              "CoreML output data type is not MLMultiArrayDataTypeInt32");
-
-            const auto model_output_span = gsl::span{static_cast<const int32_t*>(model_output_buffer), num_elements};
-            const auto output_span = gsl::span{static_cast<int64_t*>(output_buffer), num_elements};
-            std::transform(model_output_span.begin(), model_output_span.end(), output_span.begin(),
-                           [](int32_t v) { return static_cast<int64_t>(v); });
-            break;
-          }
-          default:
-            return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
-                                   "Output data type is not supported, actual type: ", onnx_data_type);
+        ORT_RETURN_IF_NOT(IsArrayContiguous(data),
+                          "Non-contiguous output MLMultiArray is not currently supported");
+        __block Status copy_status;
+        const auto* tensor_info = &output_tensor_info;
+        // `getBytesWithHandler` replaces deprecated `.dataPointer` on new versions
+        if (@available(macOS 12.3, iOS 15.4, *)) {
+          [data getBytesWithHandler:^(const void* bytes, NSInteger size) {
+            copy_status = CopyMLMultiArrayBuffer(bytes, output_buffer, data, tensor_info, size);
+          }];
+        } else {
+          // disable size check as old API does not return buffer length
+          copy_status = CopyMLMultiArrayBuffer(data.dataPointer, output_buffer, data, tensor_info, std::nullopt);
         }
+        if (!copy_status.IsOK())
+          return copy_status;
       }
     }
   }