diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 4fca4037301fb..0ffde116f4efc 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -25,6 +25,11 @@ GlobalContext& BackendManager::GetGlobalContext() { return global_context_; } +ov::CompiledModel& BackendManager::GetOVCompiledModel() { + ov::CompiledModel& ov_ptr = concrete_backend_->GetOVCompiledModel(); + return (ov_ptr); +} + BackendManager::BackendManager(const GlobalContext& global_context, const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, @@ -35,7 +40,7 @@ BackendManager::BackendManager(const GlobalContext& global_context, openvino_sdk_version_ = std::to_string(global_context_.OpenVINO_Version.at(0)) + "." + std::to_string(global_context_.OpenVINO_Version.at(1)); if (ep_ctx_handle_.CheckForOVEPCtxNode(subgraph, openvino_sdk_version_)) { - if (ep_ctx_handle_.ImportBlobFromEPCtxModel(subgraph) != Status::OK()) + if (ep_ctx_handle_.ImportBlobFromEPCtxModel(subgraph, global_context_.ep_context_embed_mode) != Status::OK()) ORT_THROW("Import blob from model failed"); } diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h index b9ff7a72372b3..5ec462afd9d01 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.h +++ b/onnxruntime/core/providers/openvino/backend_manager.h @@ -30,6 +30,7 @@ class BackendManager { GlobalContext& GetGlobalContext(); Status ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger); + ov::CompiledModel& GetOVCompiledModel(); private: std::unique_ptr GetModelProtoFromFusedNode( diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc index ee9486a62ea37..b0a7f46521cce 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc @@ -94,11 +94,12 @@ Status EPCtxHandler::ExportEPCtxModel(const GraphViewer& graph_viewer, return Status::OK(); } -Status EPCtxHandler::ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer) { +Status EPCtxHandler::ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer, bool& ep_context_embed_mode) { auto node = graph_viewer.GetNode(0); auto& attrs = node->GetAttributes(); ORT_ENFORCE(attrs.count(EP_CACHE_CONTEXT) > 0); model_stream_ = std::make_shared(attrs.at(EP_CACHE_CONTEXT).s()); + ep_context_embed_mode = static_cast(attrs.at(EMBED_MODE).i()); LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node"; is_valid_ep_ctx_graph_ = true; diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h index c631d011d02b1..c7ee943dff761 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h @@ -30,7 +30,7 @@ class EPCtxHandler { const bool& ep_context_embed_mode, std::string&& model_blob_str, const std::string& openvino_sdk_version) const; - Status ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer); + Status ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer, bool& ep_context_embed_mode); bool CheckForOVEPCtxNode(const GraphViewer& graph_viewer, std::string openvino_sdk_version) const; bool IsValidOVEPCtxGraph() const { return is_valid_ep_ctx_graph_; } [[nodiscard]] const std::shared_ptr GetModelBlobStream() const { return model_stream_; } diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index 19a634818a442..6e39f5832226c 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -2,13 +2,14 @@ // Licensed under the MIT License #include #include - +#include #include "core/providers/shared_library/provider_api.h" #include "core/providers/openvino/openvino_execution_provider.h" #include "core/providers/openvino/contexts.h" #include "core/providers/openvino/backend_manager.h" #include "core/providers/openvino/onnx_ctx_model_helper.h" #include "core/providers/openvino/ov_versions/capability.h" +#include "core/session/onnxruntime_session_options_config_keys.h" #include "openvino/core/version.hpp" #ifdef USE_OVEP_NPU_MEMORY #include "core/providers/openvino/ov_allocator.h" @@ -150,7 +151,7 @@ common::Status OpenVINOExecutionProvider::Compile( graph_body_viewer, *GetLogger(), ep_ctx_handle_); - + backend_manager_ = backend_manager; compute_info.create_state_func = [backend_manager](ComputeContext* context, FunctionState* state) { OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState(); @@ -198,4 +199,37 @@ std::vector OpenVINOExecutionProvider::CreatePreferredAllocators() } #endif +common::Status OpenVINOExecutionProvider::SetEpDynamicOptions(gsl::span keys, + gsl::span values) { + std::string workload_type = ""; + // Ensure the number of keys and values match + if (keys.size() != values.size()) { + return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Mismatched keys and values sizes."); + } + + for (size_t i = 0; i < keys.size(); ++i) { + std::string key = keys[i]; + std::string value = values[i]; + + if (key == kOrtEpDynamicOptionsWorkloadType) { + if (value == "Efficient") { + workload_type = "EFFICIENT"; + } else if (value == "Default") { + workload_type = "DEFAULT"; + } else { + LOGS_DEFAULT(WARNING) << "Unknown workload_type - ignoring " << key << "/" << value; + LOGS_DEFAULT(WARNING) << "Supported types are 'Efficient' and 'Default' \n"; + } + if (workload_type != "") { + LOGS_DEFAULT(INFO) << "SetEpDynamicOptions - modifying: " << key << "/" << value; + ov::CompiledModel& ov_compiled_model = backend_manager_->GetOVCompiledModel(); + ov_compiled_model.set_property(ov::workload_type(workload_type)); + } + } else { + // Handle unknown options + LOGS_DEFAULT(WARNING) << "Unknown key/value pair - ignoring " << key << "/" << value; + } + } + return Status::OK(); +} } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h index 7d9da65ea7e07..16f06ad9dd1da 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h @@ -188,6 +188,9 @@ class OpenVINOExecutionProvider : public IExecutionProvider { Status Compile(const std::vector& fused_nodes, std::vector& node_compute_funcs) override; + Status SetEpDynamicOptions(gsl::span /*keys*/, + gsl::span /*values*/) override; + const void* GetExecutionHandle() const noexcept override { return nullptr; } @@ -197,6 +200,7 @@ class OpenVINOExecutionProvider : public IExecutionProvider { private: std::unique_ptr global_context_; openvino_ep::EPCtxHandler ep_ctx_handle_{}; + std::shared_ptr backend_manager_; }; } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index b46106db3c232..57c4e92685c96 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -7,6 +7,7 @@ #include "core/providers/openvino/openvino_provider_factory.h" #include "core/providers/openvino/openvino_execution_provider.h" #include "core/providers/openvino/openvino_provider_factory_creator.h" +#include "core/session/onnxruntime_session_options_config_keys.h" #include "nlohmann/json.hpp" namespace onnxruntime { @@ -50,10 +51,10 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory { }; std::unique_ptr OpenVINOProviderFactory::CreateProvider() { - bool so_disable_cpu_fallback = config_options_.GetConfigOrDefault("session.disable_cpu_ep_fallback", "0") == "1"; - bool so_export_ep_ctx_blob = config_options_.GetConfigOrDefault("ep.context_enable", "0") == "1"; - bool so_epctx_embed_mode = config_options_.GetConfigOrDefault("ep.context_embed_mode", "1") == "1"; - std::string so_cache_path = config_options_.GetConfigOrDefault("ep.context_file_path", "").c_str(); + bool so_disable_cpu_fallback = config_options_.GetConfigOrDefault(kOrtSessionOptionsDisableCPUEPFallback, "0") == "1"; + bool so_export_ep_ctx_blob = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") == "1"; + bool so_epctx_embed_mode = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "1") == "1"; + std::string so_cache_path = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, "").c_str(); if (so_export_ep_ctx_blob && !so_cache_path.empty()) { cache_dir_ = so_cache_path;