From 20a45dd67b773018d292befad789c717ae46615d Mon Sep 17 00:00:00 2001 From: wejoncy Date: Tue, 15 Oct 2024 11:50:11 +0800 Subject: [PATCH] [CoreML ML Program] support acclerators selector (#22383) ### Description For no, CoreML only support run mlmodels on CPU/ALL, However, sometimes CPU_GPU would be faster a lot. We support the option to select different hardware to boost performance in this PR. ### Motivation and Context --------- Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com> --- .../ProviderOptions.shared.cs | 3 ++- .../core/providers/coreml/coreml_provider_factory.h | 7 ++++++- .../java/ai/onnxruntime/providers/CoreMLFlags.java | 4 +++- js/common/lib/inference-session.ts | 2 ++ js/react_native/ios/OnnxruntimeModule.mm | 2 ++ objectivec/include/ort_coreml_execution_provider.h | 5 ++++- objectivec/ort_coreml_execution_provider.mm | 1 + .../providers/coreml/coreml_execution_provider.cc | 8 ++++++++ onnxruntime/core/providers/coreml/model/model.mm | 12 +++++++++--- onnxruntime/python/onnxruntime_pybind_state.cc | 2 ++ onnxruntime/test/perftest/command_args_parser.cc | 2 +- onnxruntime/test/perftest/ort_test_session.cc | 6 ++++++ 12 files changed, 46 insertions(+), 8 deletions(-) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.shared.cs index b04f7886b76dd..1b9cd7572170b 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.shared.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.shared.cs @@ -330,7 +330,8 @@ public enum CoreMLFlags : uint COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE = 0x004, COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES = 0x008, COREML_FLAG_CREATE_MLPROGRAM = 0x010, - COREML_FLAG_LAST = COREML_FLAG_CREATE_MLPROGRAM, + COREML_FLAG_USE_CPU_AND_GPU = 0x020, + COREML_FLAG_LAST = COREML_FLAG_USE_CPU_AND_GPU, } /// diff --git a/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h b/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h index 55abb90b981f5..7a6ba3afddce7 100644 --- a/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h +++ b/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h @@ -31,9 +31,14 @@ enum COREMLFlags { // Create an MLProgram. By default it will create a NeuralNetwork model. Requires Core ML 5 or later. COREML_FLAG_CREATE_MLPROGRAM = 0x010, + // Exclude ANE as sometimes this decrease performance + // https://developer.apple.com/documentation/coreml/mlcomputeunits?language=objc + // there are four compute units: + // MLComputeUnitsCPUAndNeuralEngine|MLComputeUnitsCPUAndGPU|MLComputeUnitsCPUOnly|MLComputeUnitsAll + COREML_FLAG_USE_CPU_AND_GPU = 0x020, // Keep COREML_FLAG_LAST at the end of the enum definition // And assign the last COREMLFlag to it - COREML_FLAG_LAST = COREML_FLAG_CREATE_MLPROGRAM, + COREML_FLAG_LAST = COREML_FLAG_USE_CPU_AND_GPU, }; #ifdef __cplusplus diff --git a/java/src/main/java/ai/onnxruntime/providers/CoreMLFlags.java b/java/src/main/java/ai/onnxruntime/providers/CoreMLFlags.java index cec3fadf446ca..22bf940844774 100644 --- a/java/src/main/java/ai/onnxruntime/providers/CoreMLFlags.java +++ b/java/src/main/java/ai/onnxruntime/providers/CoreMLFlags.java @@ -25,7 +25,9 @@ public enum CoreMLFlags implements OrtFlags { * Create an MLProgram. By default it will create a NeuralNetwork model. Requires Core ML 5 or * later. */ - CREATE_MLPROGRAM(16); // COREML_FLAG_CREATE_MLPROGRAM(0x010) + CREATE_MLPROGRAM(16), // COREML_FLAG_CREATE_MLPROGRAM(0x010) + /** exclude ANE */ + CPU_AND_GPU(32); // COREML_FLAG_USE_CPU_AND_GPU(0x020) /** The native value of the enum. */ public final int value; diff --git a/js/common/lib/inference-session.ts b/js/common/lib/inference-session.ts index af8a8c76c8fe4..547db029471a2 100644 --- a/js/common/lib/inference-session.ts +++ b/js/common/lib/inference-session.ts @@ -320,6 +320,7 @@ export declare namespace InferenceSession { * COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE = 0x004 * COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES = 0x008 * COREML_FLAG_CREATE_MLPROGRAM = 0x010 + * COREML_FLAG_USE_CPU_AND_GPU = 0x020 * ``` * * See include/onnxruntime/core/providers/coreml/coreml_provider_factory.h for more details. @@ -333,6 +334,7 @@ export declare namespace InferenceSession { * This setting is available only in ONNXRuntime (react-native). */ useCPUOnly?: boolean; + useCPUAndGPU?: boolean; /** * Specify whether to enable CoreML EP on subgraph. * diff --git a/js/react_native/ios/OnnxruntimeModule.mm b/js/react_native/ios/OnnxruntimeModule.mm index 9da76034fc1ad..16e64d8ed98b4 100644 --- a/js/react_native/ios/OnnxruntimeModule.mm +++ b/js/react_native/ios/OnnxruntimeModule.mm @@ -389,6 +389,8 @@ - (NSDictionary*)run:(NSString*)url if (useOptions) { if ([[executionProvider objectForKey:@"useCPUOnly"] boolValue]) { coreml_flags |= COREML_FLAG_USE_CPU_ONLY; + } else if ([[executionProvider objectForKey:@"useCPUAndGPU"] boolValue]) { + coreml_flags |= COREML_FLAG_USE_CPU_AND_GPU; } if ([[executionProvider objectForKey:@"enableOnSubgraph"] boolValue]) { coreml_flags |= COREML_FLAG_ENABLE_ON_SUBGRAPH; diff --git a/objectivec/include/ort_coreml_execution_provider.h b/objectivec/include/ort_coreml_execution_provider.h index 6ff18176ebeb2..d7d873f5eb0e0 100644 --- a/objectivec/include/ort_coreml_execution_provider.h +++ b/objectivec/include/ort_coreml_execution_provider.h @@ -29,7 +29,10 @@ NS_ASSUME_NONNULL_BEGIN * Whether the CoreML execution provider should run on CPU only. */ @property BOOL useCPUOnly; - +/** + * exclude ANE in CoreML. + */ +@property BOOL useCPUAndGPU; /** * Whether the CoreML execution provider is enabled on subgraphs. */ diff --git a/objectivec/ort_coreml_execution_provider.mm b/objectivec/ort_coreml_execution_provider.mm index 58b47d68eea63..6cb5026b93521 100644 --- a/objectivec/ort_coreml_execution_provider.mm +++ b/objectivec/ort_coreml_execution_provider.mm @@ -25,6 +25,7 @@ - (BOOL)appendCoreMLExecutionProviderWithOptions:(ORTCoreMLExecutionProviderOpti try { const uint32_t flags = (options.useCPUOnly ? COREML_FLAG_USE_CPU_ONLY : 0) | + (options.useCPUAndGPU ? COREML_FLAG_USE_CPU_AND_GPU : 0) | (options.enableOnSubgraphs ? COREML_FLAG_ENABLE_ON_SUBGRAPH : 0) | (options.onlyEnableForDevicesWithANE ? COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE : 0) | (options.onlyAllowStaticInputShapes ? COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES : 0) | diff --git a/onnxruntime/core/providers/coreml/coreml_execution_provider.cc b/onnxruntime/core/providers/coreml/coreml_execution_provider.cc index f2cd4d01174d3..b7d9211e0a9c2 100644 --- a/onnxruntime/core/providers/coreml/coreml_execution_provider.cc +++ b/onnxruntime/core/providers/coreml/coreml_execution_provider.cc @@ -32,6 +32,14 @@ CoreMLExecutionProvider::CoreMLExecutionProvider(uint32_t coreml_flags) LOGS_DEFAULT(ERROR) << "CoreML EP is not supported on this platform."; } + // check if only one flag is set + if ((coreml_flags & COREML_FLAG_USE_CPU_ONLY) && (coreml_flags & COREML_FLAG_USE_CPU_AND_GPU)) { + // multiple device options selected + ORT_THROW( + "Multiple device options selected, you should use at most one of the following options:" + "COREML_FLAG_USE_CPU_ONLY or COREML_FLAG_USE_CPU_AND_GPU or not set"); + } + #if defined(COREML_ENABLE_MLPROGRAM) if (coreml_version_ < MINIMUM_COREML_MLPROGRAM_VERSION && (coreml_flags_ & COREML_FLAG_CREATE_MLPROGRAM) != 0) { diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 5f4eebc7d72ce..ff32c52f942b2 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -395,9 +395,15 @@ Status Predict(const std::unordered_map& inputs, compiled_model_path_ = [compileUrl path]; MLModelConfiguration* config = [[MLModelConfiguration alloc] init]; - config.computeUnits = (coreml_flags_ & COREML_FLAG_USE_CPU_ONLY) - ? MLComputeUnitsCPUOnly - : MLComputeUnitsAll; + + if (coreml_flags_ & COREML_FLAG_USE_CPU_ONLY) { + config.computeUnits = MLComputeUnitsCPUOnly; + } else if (coreml_flags_ & COREML_FLAG_USE_CPU_AND_GPU) { + config.computeUnits = MLComputeUnitsCPUAndGPU; + } else { + config.computeUnits = MLComputeUnitsAll; + } + model_ = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error]; if (error != nil || model_ == nil) { diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 63757a6120fa3..7af659851e4f8 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -1213,6 +1213,8 @@ std::unique_ptr CreateExecutionProviderInstance( if (flags_str.find("COREML_FLAG_USE_CPU_ONLY") != std::string::npos) { coreml_flags |= COREMLFlags::COREML_FLAG_USE_CPU_ONLY; + } else if (flags_str.find("COREML_FLAG_USE_CPU_AND_GPU") != std::string::npos) { + coreml_flags |= COREMLFlags::COREML_FLAG_USE_CPU_AND_GPU; } if (flags_str.find("COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES") != std::string::npos) { diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 9e1098b24f611..94945c0393d08 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -127,7 +127,7 @@ namespace perftest { "\t [NNAPI only] [NNAPI_FLAG_CPU_ONLY]: Using CPU only in NNAPI EP.\n" "\t [Example] [For NNAPI EP] -e nnapi -i \"NNAPI_FLAG_USE_FP16 NNAPI_FLAG_USE_NCHW NNAPI_FLAG_CPU_DISABLED\"\n" "\n" - "\t [CoreML only] [COREML_FLAG_CREATE_MLPROGRAM]: Create an ML Program model instead of Neural Network.\n" + "\t [CoreML only] [COREML_FLAG_CREATE_MLPROGRAM COREML_FLAG_USE_CPU_ONLY COREML_FLAG_USE_CPU_AND_GPU]: Create an ML Program model instead of Neural Network.\n" "\t [Example] [For CoreML EP] -e coreml -i \"COREML_FLAG_CREATE_MLPROGRAM\"\n" "\n" "\t [SNPE only] [runtime]: SNPE runtime, options: 'CPU', 'GPU', 'GPU_FLOAT16', 'DSP', 'AIP_FIXED_TF'. \n" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index a369c36ae9c43..fcdef48eda56c 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -425,6 +425,12 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); if (key == "COREML_FLAG_CREATE_MLPROGRAM") { coreml_flags |= COREML_FLAG_CREATE_MLPROGRAM; std::cout << "Enabling ML Program.\n"; + } else if (key == "COREML_FLAG_USE_CPU_ONLY") { + coreml_flags |= COREML_FLAG_USE_CPU_ONLY; + std::cout << "CoreML enabled COREML_FLAG_USE_CPU_ONLY.\n"; + } else if (key == "COREML_FLAG_USE_CPU_AND_GPU") { + coreml_flags |= COREML_FLAG_USE_CPU_AND_GPU; + std::cout << "CoreML enabled COREML_FLAG_USE_CPU_AND_GPU.\n"; } else if (key.empty()) { } else { ORT_THROW(