Skip to content

Commit

Permalink
OVEP Dynamic WorkloadType support (microsoft#22779)
Browse files Browse the repository at this point in the history
### Description
Support to set EPdynamic options in OVEP

### Motivation and Context
relate to microsoft#22282

---------

Co-authored-by: Javier E. Martinez <[email protected]>
  • Loading branch information
preetha-intel and javier-intel authored Nov 10, 2024
1 parent 63cb532 commit c9ed016
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 9 deletions.
7 changes: 6 additions & 1 deletion onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ GlobalContext& BackendManager::GetGlobalContext() {
return global_context_;
}

ov::CompiledModel& BackendManager::GetOVCompiledModel() {
ov::CompiledModel& ov_ptr = concrete_backend_->GetOVCompiledModel();
return (ov_ptr);
}

BackendManager::BackendManager(const GlobalContext& global_context,
const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
Expand All @@ -35,7 +40,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
openvino_sdk_version_ = std::to_string(global_context_.OpenVINO_Version.at(0)) + "." +
std::to_string(global_context_.OpenVINO_Version.at(1));
if (ep_ctx_handle_.CheckForOVEPCtxNode(subgraph, openvino_sdk_version_)) {
if (ep_ctx_handle_.ImportBlobFromEPCtxModel(subgraph) != Status::OK())
if (ep_ctx_handle_.ImportBlobFromEPCtxModel(subgraph, global_context_.ep_context_embed_mode) != Status::OK())
ORT_THROW("Import blob from model failed");
}

Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/openvino/backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class BackendManager {
GlobalContext& GetGlobalContext();
Status ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger);
ov::CompiledModel& GetOVCompiledModel();

private:
std::unique_ptr<ONNX_NAMESPACE::ModelProto> GetModelProtoFromFusedNode(
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,12 @@ Status EPCtxHandler::ExportEPCtxModel(const GraphViewer& graph_viewer,
return Status::OK();
}

Status EPCtxHandler::ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer) {
Status EPCtxHandler::ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer, bool& ep_context_embed_mode) {
auto node = graph_viewer.GetNode(0);
auto& attrs = node->GetAttributes();
ORT_ENFORCE(attrs.count(EP_CACHE_CONTEXT) > 0);
model_stream_ = std::make_shared<std::istringstream>(attrs.at(EP_CACHE_CONTEXT).s());
ep_context_embed_mode = static_cast<bool>(attrs.at(EMBED_MODE).i());
LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node";

is_valid_ep_ctx_graph_ = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class EPCtxHandler {
const bool& ep_context_embed_mode,
std::string&& model_blob_str,
const std::string& openvino_sdk_version) const;
Status ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer);
Status ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer, bool& ep_context_embed_mode);
bool CheckForOVEPCtxNode(const GraphViewer& graph_viewer, std::string openvino_sdk_version) const;
bool IsValidOVEPCtxGraph() const { return is_valid_ep_ctx_graph_; }
[[nodiscard]] const std::shared_ptr<std::istringstream> GetModelBlobStream() const { return model_stream_; }
Expand Down
38 changes: 36 additions & 2 deletions onnxruntime/core/providers/openvino/openvino_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
// Licensed under the MIT License
#include <filesystem>
#include <utility>

#include <string>
#include "core/providers/shared_library/provider_api.h"
#include "core/providers/openvino/openvino_execution_provider.h"
#include "core/providers/openvino/contexts.h"
#include "core/providers/openvino/backend_manager.h"
#include "core/providers/openvino/onnx_ctx_model_helper.h"
#include "core/providers/openvino/ov_versions/capability.h"
#include "core/session/onnxruntime_session_options_config_keys.h"
#include "openvino/core/version.hpp"
#ifdef USE_OVEP_NPU_MEMORY
#include "core/providers/openvino/ov_allocator.h"
Expand Down Expand Up @@ -150,7 +151,7 @@ common::Status OpenVINOExecutionProvider::Compile(
graph_body_viewer,
*GetLogger(),
ep_ctx_handle_);

backend_manager_ = backend_manager;
compute_info.create_state_func =
[backend_manager](ComputeContext* context, FunctionState* state) {
OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState();
Expand Down Expand Up @@ -198,4 +199,37 @@ std::vector<AllocatorPtr> OpenVINOExecutionProvider::CreatePreferredAllocators()
}
#endif

common::Status OpenVINOExecutionProvider::SetEpDynamicOptions(gsl::span<const char* const> keys,
gsl::span<const char* const> values) {
std::string workload_type = "";
// Ensure the number of keys and values match
if (keys.size() != values.size()) {
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Mismatched keys and values sizes.");
}

for (size_t i = 0; i < keys.size(); ++i) {
std::string key = keys[i];
std::string value = values[i];

if (key == kOrtEpDynamicOptionsWorkloadType) {
if (value == "Efficient") {
workload_type = "EFFICIENT";
} else if (value == "Default") {
workload_type = "DEFAULT";
} else {
LOGS_DEFAULT(WARNING) << "Unknown workload_type - ignoring " << key << "/" << value;
LOGS_DEFAULT(WARNING) << "Supported types are 'Efficient' and 'Default' \n";
}
if (workload_type != "") {
LOGS_DEFAULT(INFO) << "SetEpDynamicOptions - modifying: " << key << "/" << value;
ov::CompiledModel& ov_compiled_model = backend_manager_->GetOVCompiledModel();
ov_compiled_model.set_property(ov::workload_type(workload_type));
}
} else {
// Handle unknown options
LOGS_DEFAULT(WARNING) << "Unknown key/value pair - ignoring " << key << "/" << value;
}
}
return Status::OK();
}
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ class OpenVINOExecutionProvider : public IExecutionProvider {
Status Compile(const std::vector<FusedNodeAndGraph>& fused_nodes,
std::vector<NodeComputeInfo>& node_compute_funcs) override;

Status SetEpDynamicOptions(gsl::span<const char* const> /*keys*/,
gsl::span<const char* const> /*values*/) override;

const void* GetExecutionHandle() const noexcept override {
return nullptr;
}
Expand All @@ -197,6 +200,7 @@ class OpenVINOExecutionProvider : public IExecutionProvider {
private:
std::unique_ptr<openvino_ep::GlobalContext> global_context_;
openvino_ep::EPCtxHandler ep_ctx_handle_{};
std::shared_ptr<openvino_ep::BackendManager> backend_manager_;
};

} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "core/providers/openvino/openvino_provider_factory.h"
#include "core/providers/openvino/openvino_execution_provider.h"
#include "core/providers/openvino/openvino_provider_factory_creator.h"
#include "core/session/onnxruntime_session_options_config_keys.h"
#include "nlohmann/json.hpp"

namespace onnxruntime {
Expand Down Expand Up @@ -50,10 +51,10 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
};

std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
bool so_disable_cpu_fallback = config_options_.GetConfigOrDefault("session.disable_cpu_ep_fallback", "0") == "1";
bool so_export_ep_ctx_blob = config_options_.GetConfigOrDefault("ep.context_enable", "0") == "1";
bool so_epctx_embed_mode = config_options_.GetConfigOrDefault("ep.context_embed_mode", "1") == "1";
std::string so_cache_path = config_options_.GetConfigOrDefault("ep.context_file_path", "").c_str();
bool so_disable_cpu_fallback = config_options_.GetConfigOrDefault(kOrtSessionOptionsDisableCPUEPFallback, "0") == "1";
bool so_export_ep_ctx_blob = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") == "1";
bool so_epctx_embed_mode = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "1") == "1";
std::string so_cache_path = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, "").c_str();

if (so_export_ep_ctx_blob && !so_cache_path.empty()) {
cache_dir_ = so_cache_path;
Expand Down

0 comments on commit c9ed016

Please sign in to comment.