From 590a80cd83e74f93de448d5928663dff3275e6d1 Mon Sep 17 00:00:00 2001 From: wejoncy Date: Tue, 10 Dec 2024 18:22:27 +0800 Subject: [PATCH] support coreml model cache --- .../coreml/coreml_provider_factory.h | 12 ++ onnxruntime/core/platform/env.h | 2 + onnxruntime/core/platform/posix/env.cc | 8 + onnxruntime/core/platform/windows/env.cc | 10 + onnxruntime/core/platform/windows/env.h | 2 + .../coreml/builders/model_builder.cc | 42 +++- .../providers/coreml/builders/model_builder.h | 6 +- .../core/providers/coreml/coreml_options.cc | 20 ++ .../core/providers/coreml/coreml_options.h | 4 + .../core/providers/coreml/model/model.mm | 198 +++++++++++------- onnxruntime/test/perftest/ort_test_session.cc | 4 +- 11 files changed, 231 insertions(+), 77 deletions(-) diff --git a/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h b/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h index d035fd34bd072..12bdcddb5ae2a 100644 --- a/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h +++ b/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h @@ -61,6 +61,18 @@ static const char* const kCoremlProviderOption_SpecializationStrategy = "Special static const char* const kCoremlProviderOption_ProfileComputePlan = "ProfileComputePlan"; // please refer to https://developer.apple.com/documentation/coreml/mlmodelconfiguration/allowlowprecisionaccumulationongpu static const char* const kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU = "AllowLowPrecisionAccumulationOnGPU"; +// Specify the path to cache the model. +// CoreML EP will convert onnx subgraph to CoreML model and save to disk. +// If this path is not specified, the model will be saved to a temp directory and deleted after the session is closed. +// otherwise, the model will be saved to the specified path and User should manage to delete the model. +// The basic logic is: +// if (ModelCachePath != nullptr && ModelCachePath/cache_coreml.exists()) { +// // load from cache_coreml +// } else { +// // save to ModelCachePath +// } +// we wound not detect if the cached model match the onnx subgraph, so User should carefully manage the cache for a new model. +static const char* const kCoremlProviderOption_ModelCachePath = "ModelCachePath"; #ifdef __cplusplus extern "C" { diff --git a/onnxruntime/core/platform/env.h b/onnxruntime/core/platform/env.h index c42b31e64d129..7dbc3fe82db47 100644 --- a/onnxruntime/core/platform/env.h +++ b/onnxruntime/core/platform/env.h @@ -197,6 +197,7 @@ class Env { #ifdef _WIN32 /// \brief Returns true if the directory exists. virtual bool FolderExists(const std::wstring& path) const = 0; + virtual bool FileExists(const std::wstring& path) const = 0; /// \brief Recursively creates the directory, if it doesn't exist. virtual common::Status CreateFolder(const std::wstring& path) const = 0; // Mainly for use with protobuf library @@ -206,6 +207,7 @@ class Env { #endif /// \brief Returns true if the directory exists. virtual bool FolderExists(const std::string& path) const = 0; + virtual bool FileExists(const std::string& path) const = 0; /// \brief Recursively creates the directory, if it doesn't exist. virtual common::Status CreateFolder(const std::string& path) const = 0; // Recursively deletes the directory and its contents. diff --git a/onnxruntime/core/platform/posix/env.cc b/onnxruntime/core/platform/posix/env.cc index 04cf5ff6a3329..94aadf3df4d7e 100644 --- a/onnxruntime/core/platform/posix/env.cc +++ b/onnxruntime/core/platform/posix/env.cc @@ -471,6 +471,14 @@ class PosixEnv : public Env { return S_ISDIR(sb.st_mode); } + bool FileExists(const std::string& path) const override { + struct stat sb; + if (stat(path.c_str(), &sb)) { + return false; + } + return S_ISREG(sb.st_mode); + } + common::Status CreateFolder(const std::string& path) const override { size_t pos = 0; do { diff --git a/onnxruntime/core/platform/windows/env.cc b/onnxruntime/core/platform/windows/env.cc index 73319cd9c9b1c..4fccad6dfeb37 100644 --- a/onnxruntime/core/platform/windows/env.cc +++ b/onnxruntime/core/platform/windows/env.cc @@ -483,6 +483,16 @@ bool WindowsEnv::FolderExists(const std::string& path) const { return (attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY); } +bool WindowsEnv::FileExists(const std::wstring& path) const { + DWORD attributes = GetFileAttributesW(path.c_str()); + return (attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_NORMAL); +} + +bool WindowsEnv::FileExists(const std::string& path) const { + DWORD attributes = GetFileAttributesA(path.c_str()); + return (attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_NORMAL); +} + common::Status WindowsEnv::CreateFolder(const std::wstring& path) const { size_t pos = 0; do { diff --git a/onnxruntime/core/platform/windows/env.h b/onnxruntime/core/platform/windows/env.h index 395aface1d809..05b92bb6a21eb 100644 --- a/onnxruntime/core/platform/windows/env.h +++ b/onnxruntime/core/platform/windows/env.h @@ -68,6 +68,8 @@ class WindowsEnv : public Env { MappedMemoryPtr& mapped_memory) const override; bool FolderExists(const std::wstring& path) const override; bool FolderExists(const std::string& path) const override; + bool FileExists(const std::wstring& path) const override; + bool FileExists(const std::string& path) const override; common::Status CreateFolder(const std::wstring& path) const override; common::Status CreateFolder(const std::string& path) const override; common::Status DeleteFolder(const PathString& path) const override; diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.cc b/onnxruntime/core/providers/coreml/builders/model_builder.cc index 6486942199df7..8c98ee0c4097e 100644 --- a/onnxruntime/core/providers/coreml/builders/model_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/model_builder.cc @@ -410,10 +410,37 @@ ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const logging::Logge coreml_version_(coreml_version), coreml_options_(coreml_options), create_ml_program_(coreml_options.CreateMLProgram()), - model_output_path_(GetModelOutputPath(create_ml_program_)), onnx_input_names_(std::move(onnx_input_names)), onnx_output_names_(std::move(onnx_output_names)), coreml_model_(std::make_unique()) { + if (coreml_options.ModelCachePath().empty()) { + model_output_path_ = GetModelOutputPath(create_ml_program_); + } else { + // input names in onnx are unique. so we can use them as the key in the cache. + std::string inputs_collections = std::accumulate( + onnx_input_names_.begin(), onnx_input_names_.end(), std::string(), + [](const std::string& a, const std::string& b) { return a + "," + b; }); + std::hash hasher; + // different subgraph has different folders. so we need to hash the inputs. + model_output_path_ = std::string(coreml_options.ModelCachePath()) + + "/" + std::to_string(hasher(inputs_collections)); + if (!coreml_options_.CreateMLProgram()) { + ORT_THROW_IF_ERROR(Env::Default().CreateFolder(model_output_path_)); + model_output_path_ += "/mlmodel"; + } + } + + // GetModelOutputPath(create_ml_program_) always produce a unique path for the model and this is not existed + // Mlprogram will create a folder while NN create a file + if (Env::Default().FolderExists(ToPathString(model_output_path_)) || + Env::Default().FileExists(ToPathString(model_output_path_))) { + is_model_cached_ = true; + LOGS(logger, WARNING) << "Model is already cached in " << model_output_path_ + << " and will be reused. If you want to update the model or hit other issues, " + << "please consider to clear the cache and retry."; + return; + } + if (create_ml_program_) { #if defined(COREML_ENABLE_MLPROGRAM) coreml_model_->set_specificationversion(CoreMLSpecVersion()); @@ -847,6 +874,10 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i input_output_info_.emplace(name, OnnxTensorInfo{data_type, shape}); + if (is_model_cached_) { + return Status::OK(); + } + #if defined(COREML_ENABLE_MLPROGRAM) if (create_ml_program_) { if (is_input) { @@ -1056,8 +1087,13 @@ Status ModelBuilder::Build(const GraphViewer& graph_viewer, const logging::Logge ModelBuilder builder(graph_viewer, logger, coreml_version, coreml_options, std::move(onnx_input_names), std::move(onnx_output_names)); - ORT_RETURN_IF_ERROR(builder.CreateModel()); - ORT_RETURN_IF_ERROR(builder.SaveModel()); + if (!builder.IsModelCached()) { + ORT_RETURN_IF_ERROR(builder.CreateModel()); + ORT_RETURN_IF_ERROR(builder.SaveModel()); + } else { + ORT_RETURN_IF_ERROR(builder.RegisterModelInputs()); + ORT_RETURN_IF_ERROR(builder.RegisterModelOutputs()); + } return builder.LoadModel(model); } diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.h b/onnxruntime/core/providers/coreml/builders/model_builder.h index e19597cf0dc2e..28c7dc42da581 100644 --- a/onnxruntime/core/providers/coreml/builders/model_builder.h +++ b/onnxruntime/core/providers/coreml/builders/model_builder.h @@ -54,6 +54,7 @@ class ModelBuilder { // We only support CoreML 3 and later so the spec version is always version + 1. int32_t CoreMLVersion() const { return coreml_version_; } int32_t CoreMLSpecVersion() const { return coreml_version_ + 1; } + bool IsModelCached() const { return is_model_cached_; } // Returns true if we are creating an ML Program bool CreateMLProgram() const { @@ -218,8 +219,9 @@ class ModelBuilder { const logging::Logger& logger_; const int32_t coreml_version_; CoreMLOptions coreml_options_; - const bool create_ml_program_; // ML Program (CoreML5, iOS 15+, macOS 12+) or NeuralNetwork (old) - const std::string model_output_path_; // create_ml_program_ ? dir for mlpackage : filename for mlmodel + const bool create_ml_program_; // ML Program (CoreML5, iOS 15+, macOS 12+) or NeuralNetwork (old) + std::string model_output_path_; // create_ml_program_ ? dir for mlpackage : filename for mlmodel + bool is_model_cached_{false}; std::vector onnx_input_names_; std::vector onnx_output_names_; diff --git a/onnxruntime/core/providers/coreml/coreml_options.cc b/onnxruntime/core/providers/coreml/coreml_options.cc index 4ec780208e528..5babd7633cd88 100644 --- a/onnxruntime/core/providers/coreml/coreml_options.cc +++ b/onnxruntime/core/providers/coreml/coreml_options.cc @@ -5,6 +5,7 @@ #include "core/providers/coreml/coreml_provider_factory.h" // defines flags #include "core/providers/coreml/model/host_utils.h" #include "core/providers/coreml/builders/helper.h" +#include "core/platform/env.h" namespace onnxruntime { @@ -71,6 +72,7 @@ void CoreMLOptions::ValidateAndParseProviderOption(const ProviderOptions& option kCoremlProviderOption_SpecializationStrategy, kCoremlProviderOption_ProfileComputePlan, kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU, + kCoremlProviderOption_ModelCachePath, }; // Validate the options for (const auto& option : options) { @@ -103,7 +105,25 @@ void CoreMLOptions::ValidateAndParseProviderOption(const ProviderOptions& option profile_compute_plan_ = option.second == "1"; } else if (kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU == option.first) { allow_low_precision_accumulation_on_gpu_ = option.second == "1"; + } else if (kCoremlProviderOption_ModelCachePath == option.first) { + model_cache_path_ = option.second; } } + + // Set the model cache path with equireStaticShape and ModelFormat + if (model_cache_path_.size()) { + if (require_static_shape_) { + model_cache_path_ += "/static_shape"; + } else { + model_cache_path_ += "/dynamic_shape"; + } + + if (create_mlprogram_) { + model_cache_path_ += "/mlpackage"; + } else { + model_cache_path_ += "/mlnnmodel"; + } + ORT_THROW_IF_ERROR(Env::Default().CreateFolder(model_cache_path_)); + } } } // namespace onnxruntime diff --git a/onnxruntime/core/providers/coreml/coreml_options.h b/onnxruntime/core/providers/coreml/coreml_options.h index fd05c96927bd1..1ec4294492552 100644 --- a/onnxruntime/core/providers/coreml/coreml_options.h +++ b/onnxruntime/core/providers/coreml/coreml_options.h @@ -17,6 +17,8 @@ class CoreMLOptions { std::string strategy_; bool profile_compute_plan_{false}; bool allow_low_precision_accumulation_on_gpu_{false}; + // path to store the converted coreml model + std::string model_cache_path_; public: explicit CoreMLOptions(uint32_t coreml_flags); @@ -32,6 +34,8 @@ class CoreMLOptions { bool UseStrategy(std::string_view strategy) const { return strategy_ == strategy; } bool ProfileComputePlan() const { return profile_compute_plan_ && create_mlprogram_; } + std::string_view ModelCachePath() const { return model_cache_path_; } + private: void ValidateAndParseProviderOption(const ProviderOptions& options); }; diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 755dbfbd6e68c..23de66073c928 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -301,53 +301,116 @@ Status GetMLMultiArrayCopyInfo(const MLMultiArray* _Nonnull array, return Status::OK(); } -// since __clang_major__ >= 15, MLComputePlan is introduced in -// We are actually ensure the MacOS/IOS version and Xcode version is greater than `macOS 14.4, iOS 17.4`. -// The macro API_AVAILABLE should also be fine. +// since macos(14.4), ios(17.4), MLComputePlan is introduced in // Otherwise, the compiler will complain `MLComputePlan` is not defined. // we define __clang_analyzer__ here is for bypass static analysis +API_AVAILABLE(macos(14.4), ios(17.4), tvos(17.4), watchos(10.4)) +void ProfileBlock(MLComputePlan* _Nullable computePlan, MLModelStructureProgramBlock* block) { + for (MLModelStructureProgramOperation* operation in block.operations) { + for (size_t i = 0; i < operation.blocks.count; ++i) { + ProfileBlock(computePlan, operation.blocks[i]); + } + // Get the compute device usage for the operation. + MLComputePlanDeviceUsage* computeDeviceUsage = [computePlan computeDeviceUsageForMLProgramOperation:operation]; + id preferredDevice = computeDeviceUsage.preferredComputeDevice; + // Get the estimated cost of executing the operation. + MLComputePlanCost* estimatedCost = [computePlan estimatedCostOfMLProgramOperation:operation]; + if (![operation.operatorName isEqualToString:@"const"]) { + NSLog(@"Operation: %@, Device Usage: %@, Estimated Cost: %f", operation.operatorName, preferredDevice, estimatedCost.weight); + } + } +} +// since macos(14.4), ios(17.4), MLComputePlan is introduced in +// Otherwise, the compiler will complain `MLComputePlan` is not defined. +// we define __clang_analyzer__ here is for bypass static analysis +API_AVAILABLE(macos(14.4), ios(17.4), tvos(17.4), watchos(10.4)) void ProfileComputePlan(NSURL* compileUrl, MLModelConfiguration* config) { -#if defined(__APPLE__) && defined(__clang__) && __clang_major__ >= 15 && !defined(__clang_analyzer__) - if (@available(macOS 14.4, iOS 17.4, *)) { - [MLComputePlan loadContentsOfURL:compileUrl - configuration:config - completionHandler:^(MLComputePlan* _Nullable computePlan, NSError* _Nullable error) { - if (!computePlan) { - NSLog(@"Error loading compute plan: %@", error); - // Handle error. - return; - } - MLModelStructureProgram* program = computePlan.modelStructure.program; - if (!program) { - NSLog(@"Error loading program from compute plan., this is not a mlprogram model"); - return; - } - - MLModelStructureProgramFunction* mainFunction = program.functions[@"main"]; - if (!mainFunction) { - NSLog(@"Error loading main function from program"); - return; - } - - NSArray* operations = mainFunction.block.operations; - NSLog(@"Number of operations, 'const' node is included. : %lu", operations.count); - for (MLModelStructureProgramOperation* operation in operations) { - // Get the compute device usage for the operation. - MLComputePlanDeviceUsage* computeDeviceUsage = [computePlan computeDeviceUsageForMLProgramOperation:operation]; - id preferredDevice = computeDeviceUsage.preferredComputeDevice; - // Get the estimated cost of executing the operation. - MLComputePlanCost* estimatedCost = [computePlan estimatedCostOfMLProgramOperation:operation]; - if (![operation.operatorName isEqualToString:@"const"]) { - NSLog(@"Operation: %@, Device Usage: %@, Estimated Cost: %f", operation.operatorName, preferredDevice, estimatedCost.weight); - } - } +#if !defined(__clang_analyzer__) + dispatch_semaphore_t fd_sema = dispatch_semaphore_create(0); + [MLComputePlan loadContentsOfURL:compileUrl + configuration:config + completionHandler:^(MLComputePlan* _Nullable computePlan, NSError* _Nullable error) { + if (!computePlan) { + NSLog(@"Error loading compute plan: %@", error); + // Handle error. + return; + } + MLModelStructureProgram* program = computePlan.modelStructure.program; + if (!program) { + NSLog(@"Error loading program from compute plan., this is not a mlprogram model"); + return; + } + + [computePlan.modelStructure.program.functions enumerateKeysAndObjectsUsingBlock:^(NSString* function_name, + MLModelStructureProgramFunction* function, + BOOL* _Nonnull __unused stop) { + NSLog(@"profile function : %@", function_name); + ProfileBlock(computePlan, function.block); + dispatch_semaphore_signal(fd_sema); }]; + }]; + long status = dispatch_semaphore_wait(fd_sema, dispatch_time(DISPATCH_TIME_NOW, (int64_t)(5 * 60 * NSEC_PER_SEC))); + if (status != 0) { + NSLog(@"profile function : timeout"); + } +#endif +} + +API_AVAILABLE(macos(15.0), ios(18.0), tvos(18.0)) +void ConfigureOptimizationHints(MLModelConfiguration* config, const CoreMLOptions& coreml_options) { +#if !defined(__clang_analyzer__) + MLOptimizationHints* optimizationHints = [[MLOptimizationHints alloc] init]; + if (coreml_options.UseStrategy("FastPrediction")) { + optimizationHints.specializationStrategy = MLSpecializationStrategyFastPrediction; + config.optimizationHints = optimizationHints; + } else if (coreml_options.UseStrategy("Default")) { + optimizationHints.specializationStrategy = MLSpecializationStrategyDefault; + config.optimizationHints = optimizationHints; } else { - NSLog(@"iOS 17.4+/macOS 14.4+ or later is required to use the compute plan API"); + // not set } #endif } +Status CompileOrReadCachedModel(NSURL* modelUrl, const CoreMLOptions& coreml_options, + NSMutableString* compiled_model_path) { + NSURL* cached_model_base_url = modelUrl; + if (!coreml_options.CreateMLProgram()) { + cached_model_base_url = [cached_model_base_url URLByDeletingLastPathComponent]; + } + NSURL* cached_model_url = [cached_model_base_url URLByAppendingPathComponent:@"compiled_model.mlmodelc"]; + + // if cached_model_url is existed, just return + NSError* error = nil; + if ([[NSFileManager defaultManager] fileExistsAtPath:[cached_model_url path]]) { + [compiled_model_path appendString:[cached_model_url path]]; + return Status::OK(); + } + + // TODO: Update this to version with callback handler as the API used here is deprecated. + // https://developer.apple.com/documentation/coreml/mlmodel/3929553-compilemodelaturl + // As we call loadModel during EP Compile there shouldn't be an issue letting the actual compile run in the + // background. We will have to check for completion in `predict` and block until it is done. + NSURL* compiled_model_url = [MLModel compileModelAtURL:modelUrl error:&error]; + if (error != nil) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error compiling model: ", + [[error localizedDescription] UTF8String]); + } + + if (coreml_options.ModelCachePath().empty()) { + [compiled_model_path appendString:[compiled_model_url path]]; + return Status::OK(); + } + + // save the compiled model if user has set a cache path + if (![[NSFileManager defaultManager] moveItemAtURL:compiled_model_url toURL:cached_model_url error:&error]) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error copying compiled model to cache path: ", + [[cached_model_url path] UTF8String], ", reason: ", [[error localizedDescription] UTF8String]); + } + [compiled_model_path appendString:[cached_model_url path]]; + return Status::OK(); +} + // Internal Execution class // This class is part of the model class and handles the calls into CoreML. Specifically, it performs // 1. Compile the model by given path for execution @@ -366,7 +429,7 @@ Status Predict(const std::unordered_map& inputs, private: void cleanup(); NSString* coreml_model_path_{nil}; - NSString* compiled_model_path_{nil}; + NSURL* compiled_model_url_{nil}; const logging::Logger& logger_; CoreMLOptions coreml_options_; MLModel* model_{nil}; @@ -387,14 +450,18 @@ Status Predict(const std::unordered_map& inputs, } void Execution::cleanup() { + // we keep the compiled model if the user has set a cache path + if (coreml_options_.ModelCachePath().size()) { + return; + } + NSString* compiled_model_path = [compiled_model_url_ path]; NSError* error = nil; - if (compiled_model_path_ != nil) { - [[NSFileManager defaultManager] removeItemAtPath:compiled_model_path_ error:&error]; + if (compiled_model_path != nil) { + [[NSFileManager defaultManager] removeItemAtPath:compiled_model_path error:&error]; if (error != nil) { - LOGS(logger_, ERROR) << "Failed cleaning up the compiled model: " << [compiled_model_path_ UTF8String] + LOGS(logger_, ERROR) << "Failed cleaning up the compiled model: " << [compiled_model_path UTF8String] << ", error message: " << [[error localizedDescription] UTF8String]; } - compiled_model_path_ = nil; } #if !defined(NDEBUG) @@ -430,17 +497,10 @@ Status Predict(const std::unordered_map& inputs, return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create model URL from path"); } - // TODO: Update this to version with callback handler as the API used here is deprecated. - // https://developer.apple.com/documentation/coreml/mlmodel/3929553-compilemodelaturl - // As we call loadModel during EP Compile there shouldn't be an issue letting the actual compile run in the - // background. We will have to check for completion in `predict` and block until it is done. - NSURL* compileUrl = [MLModel compileModelAtURL:modelUrl error:&error]; - if (error != nil) { - return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error compiling model: ", - [[error localizedDescription] UTF8String]); - } - - compiled_model_path_ = [compileUrl path]; + NSMutableString* compiled_model_path = [[NSMutableString alloc] init]; + ORT_RETURN_IF_ERROR(CompileOrReadCachedModel( + [NSURL fileURLWithPath:coreml_model_path_], coreml_options_, compiled_model_path)); + compiled_model_url_ = [NSURL fileURLWithPath:compiled_model_path]; MLModelConfiguration* config = [[MLModelConfiguration alloc] init]; uint32_t coreml_compute_unit = coreml_options_.ComputeUnits(); @@ -458,27 +518,23 @@ Status Predict(const std::unordered_map& inputs, config.allowLowPrecisionAccumulationOnGPU = YES; } -// Set the specialization strategy to FastPrediction for macOS 10.15+ -// since __clang_major__ >= 15, optimizationHints is introduced in -// Same as above comments for why we are checking __clang_major__. -// we define __clang_analyzer__ here is for bypass static analysis -#if defined(__APPLE__) && defined(__clang__) && __clang_major__ >= 15 && !defined(__clang_analyzer__) - if (HAS_COREML8_OR_LATER) { - MLOptimizationHints* optimizationHints = [[MLOptimizationHints alloc] init]; - if (coreml_options_.UseStrategy("FastPrediction")) { - optimizationHints.specializationStrategy = MLSpecializationStrategyFastPrediction; - config.optimizationHints = optimizationHints; - } else if (coreml_options_.UseStrategy("Default")) { - optimizationHints.specializationStrategy = MLSpecializationStrategyDefault; - config.optimizationHints = optimizationHints; - } + // Set the specialization strategy to FastPrediction for macOS 10.15+ + // we define __clang_analyzer__ here is for bypass static analysis + if (@available(macOS 15.0, iOS 18.0, *)) { + ConfigureOptimizationHints(config, coreml_options_); + } else { + LOGS(logger_, WARNING) << "iOS 17.4+/macOS 14.4+ or later is required to ConfigureOptimizationHints"; } -#endif + if (coreml_options_.ProfileComputePlan()) { - ProfileComputePlan(compileUrl, config); + if (@available(macOS 14.4, iOS 17.4, *)) { + ProfileComputePlan(compiled_model_url_, config); + } else { + LOGS(logger_, WARNING) << "iOS 17.4+/macOS 14.4+ or later is required to use the compute plan API"; + } } - model_ = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error]; + model_ = [MLModel modelWithContentsOfURL:compiled_model_url_ configuration:config error:&error]; if (error != nil || model_ == nil) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create MLModel", diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index a96028ed3903e..0a09595d67252 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -349,7 +349,8 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); kCoremlProviderOption_EnableOnSubgraphs, kCoremlProviderOption_SpecializationStrategy, kCoremlProviderOption_ProfileComputePlan, - kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU}; + kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU, + kCoremlProviderOption_ModelCachePath}; ParseSessionConfigs(ov_string, provider_options, available_keys); std::unordered_map available_options = { @@ -373,6 +374,7 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); (provider_option.second == "0" || provider_option.second == "1")) { } else if (provider_option.first == kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU && (provider_option.second == "0" || provider_option.second == "1")) { + } else if (provider_option.first == kCoremlProviderOption_ModelCachePath) { } else { ORT_THROW("Invalid value for option ", provider_option.first, ": ", provider_option.second); }