Skip to content

Commit

Permalink
OpenVINO EP Rel 1.18 Changes (#20337)
Browse files Browse the repository at this point in the history
### Description
These changes include
Support to OpenVINO 2024.1 
Import PreCompiled Blobs with EPContext Blob 
Separate Device/Precision as input
Deprecate CPU_FP32 , GPU_FP32 terminology , introduce CPU, GPU 
AUTO GPU, CPU will only create GPU Blob and not CPU Blob. 



### Motivation and Context
- OpenVINO 2024.1 will be out soon
- Import Precompiled Blob can greatly reduce FEIL/FIL Time. 
- Separating Device/Precision will make the input cleaner
-

---------

Co-authored-by: Suryaprakash Shanmugam <[email protected]>
Co-authored-by: Preetha Veeramalai <[email protected]>
  • Loading branch information
3 people authored Apr 19, 2024
1 parent 9001c69 commit 4d1963c
Show file tree
Hide file tree
Showing 42 changed files with 827 additions and 710 deletions.
34 changes: 8 additions & 26 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1325,43 +1325,25 @@ if (onnxruntime_USE_OPENVINO)

add_definitions(-DUSE_OPENVINO=1)

if (onnxruntime_USE_OPENVINO_GPU_FP32)
add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
if (onnxruntime_USE_OPENVINO_GPU)
add_definitions(-DOPENVINO_CONFIG_GPU=1)
endif()

if (onnxruntime_USE_OPENVINO_GPU_FP16)
add_definitions(-DOPENVINO_CONFIG_GPU_FP16=1)
endif()

if (onnxruntime_USE_OPENVINO_CPU_FP32)
add_definitions(-DOPENVINO_CONFIG_CPU_FP32=1)
endif()

if (onnxruntime_USE_OPENVINO_CPU_FP16)
add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
if (onnxruntime_USE_OPENVINO_CPU)
add_definitions(-DOPENVINO_CONFIG_CPU=1)
endif()

if (onnxruntime_USE_OPENVINO_NPU)
add_definitions(-DOPENVINO_CONFIG_NPU=1)
endif()

if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_GPU_FP16_NP)
add_definitions(-DOPENVINO_CONFIG_GPU_FP16=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_CPU_FP32_NP)
add_definitions(-DOPENVINO_CONFIG_CPU_FP32=1)
if (onnxruntime_USE_OPENVINO_GPU_NP)
add_definitions(-DOPENVINO_CONFIG_GPU=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_CPU_FP16_NP)
add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
if (onnxruntime_USE_OPENVINO_CPU_NP)
add_definitions(-DOPENVINO_CONFIG_CPU=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

Expand Down
2 changes: 1 addition & 1 deletion dockerfiles/Dockerfile.openvino
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ ENV WORKDIR_PATH=/home/openvino
WORKDIR $WORKDIR_PATH
ENV DEBIAN_FRONTEND noninteractive

ARG DEVICE=CPU_FP32
ARG DEVICE=CPU
ARG ONNXRUNTIME_REPO=https://github.com/microsoft/onnxruntime.git
ARG ONNXRUNTIME_BRANCH=main

Expand Down
77 changes: 65 additions & 12 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
// Licensed under the MIT License

#include <fstream>
#include <sstream>
#include <utility>
#include <exception>

#include "core/providers/shared_library/provider_api.h"
#include "contexts.h"
#include "backend_manager.h"
#include "ibackend.h"
#include "backend_utils.h"
#include "core/providers/openvino/contexts.h"
#include "core/providers/openvino/backend_manager.h"
#include "core/providers/openvino/ibackend.h"
#include "core/providers/openvino/backend_utils.h"

namespace onnxruntime {
namespace openvino_ep {
Expand All @@ -21,8 +21,17 @@ GlobalContext& BackendManager::GetGlobalContext() {
BackendManager::BackendManager(const GlobalContext& global_context,
const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger) {
const logging::Logger& logger,
EPCtxHandler& ctx_handle) {
global_context_ = global_context;
ep_ctx_handle_ = ctx_handle;

openvino_sdk_version_ = std::to_string(global_context_.OpenVINO_Version.at(0)) + "." +
std::to_string(global_context_.OpenVINO_Version.at(1));
if (ep_ctx_handle_.CheckForOVEPCtxNode(subgraph, openvino_sdk_version_)) {
if (ep_ctx_handle_.ImportBlobFromEPCtxModel(subgraph) != Status::OK())
ORT_THROW("Import blob from model failed");
}

auto prec_str = GetGlobalContext().precision_str;

Expand Down Expand Up @@ -66,7 +75,8 @@ BackendManager::BackendManager(const GlobalContext& global_context,
try {
concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
GetGlobalContext(),
subgraph_context_);
subgraph_context_,
ep_ctx_handle_);
} catch (std::string const& msg) {
ORT_THROW(msg);
}
Expand All @@ -85,7 +95,8 @@ BackendManager::BackendManager(const GlobalContext& global_context,
try {
concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
GetGlobalContext(),
subgraph_context_);
subgraph_context_,
ep_ctx_handle_);
} catch (const OnnxRuntimeException& ex) {
if (device_type.find("NPU") != std::string::npos) {
LOGS_DEFAULT(WARNING) << ex.what();
Expand All @@ -96,7 +107,8 @@ BackendManager::BackendManager(const GlobalContext& global_context,
try {
concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
GetGlobalContext(),
subgraph_context_);
subgraph_context_,
ep_ctx_handle_);
} catch (std::string const& msg) {
ORT_THROW(msg);
}
Expand All @@ -107,6 +119,45 @@ BackendManager::BackendManager(const GlobalContext& global_context,
}
}

// Call EPContext model exporter here if the provider option for exporting
// precompiled blob is set. If that's the case:
// By default, create model in embed mode where the blob stream is exported as data within
// the EPContext node.
Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& graph_body_viewer,
const logging::Logger& logger) {
std::string model_blob_str;
auto compiled_model = concrete_backend_->GetOVCompiledModel();
auto graph_name = global_context_.onnx_model_path_name;
// Remove extension so we can append suffix to form the complete name of output graph
graph_name = [&]() {
size_t dot = graph_name.find_last_of(".");
if (dot == std::string::npos) return graph_name;
return graph_name.substr(0, dot);
}();
// If embed_mode, then pass on the serialized blob
// If not embed_mode, dump the blob here and only pass on the path to the blob
if (global_context_.ep_context_embed_mode) {
std::ostringstream model_blob_stream;
compiled_model.export_model(model_blob_stream);
model_blob_str = model_blob_stream.str();
ORT_ENFORCE(model_blob_str.size() != 0);
} else {
std::ofstream f(graph_name + ".blob", std::ios::out | std::ios::trunc | std::ios::binary);
compiled_model.export_model(f);
model_blob_str = graph_name + ".blob";
}

ORT_RETURN_IF_ERROR(ep_ctx_handle_.ExportEPCtxModel(graph_body_viewer,
graph_name,
logger,
global_context_.ep_context_embed_mode,
model_blob_str,
openvino_sdk_version_,
GetGlobalContext().device_type));

return Status::OK();
}

bool BackendManager::ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const {
bool has_batched_inputs = true;

Expand Down Expand Up @@ -182,7 +233,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
return model_proto;
}

std::vector<std::vector<int64_t>> GetInputTensorShapes(Ort::KernelContext& context) {
std::vector<std::vector<int64_t>> GetInputTensorShapes(const Ort::KernelContext& context) {
const auto input_count = context.GetInputCount();
std::vector<std::vector<int64_t>> input_shapes;
input_shapes.reserve(input_count);
Expand Down Expand Up @@ -289,7 +340,8 @@ void BackendManager::Compute(OrtKernelContext* context) {
try {
dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes,
GetGlobalContext(),
subgraph_context_);
subgraph_context_,
ep_ctx_handle_);
} catch (const OnnxRuntimeException& ex) {
if (GetGlobalContext().device_type.find("NPU") != std::string::npos) {
LOGS_DEFAULT(WARNING) << ex.what();
Expand All @@ -301,7 +353,8 @@ void BackendManager::Compute(OrtKernelContext* context) {
try {
dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes,
GetGlobalContext(),
subgraph_context_);
subgraph_context_,
ep_ctx_handle_);
} catch (std::string const& msg) {
ORT_THROW(msg);
}
Expand Down
14 changes: 10 additions & 4 deletions onnxruntime/core/providers/openvino/backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
#include <memory>
#include <string>

#include "ov_interface.h"
#include "contexts.h"
#include "ibackend.h"
#include "core/providers/openvino/ov_interface.h"
#include "core/providers/openvino/contexts.h"
#include "core/providers/openvino/onnx_ctx_model_helper.h"
#include "core/providers/openvino/ibackend.h"

namespace onnxruntime {
namespace openvino_ep {
Expand All @@ -21,11 +22,14 @@ class BackendManager {
BackendManager(const GlobalContext& global_context,
const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger);
const logging::Logger& logger,
EPCtxHandler& ctx_handle);
void Compute(OrtKernelContext* context);
void ShutdownBackendManager();
void SetGlobalCotext(const GlobalContext& global_context);
GlobalContext& GetGlobalContext();
Status ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger);

private:
std::unique_ptr<ONNX_NAMESPACE::ModelProto> GetModelProtoFromFusedNode(
Expand All @@ -47,6 +51,8 @@ class BackendManager {
std::map<std::string, std::shared_ptr<IBackend>> backend_map_;
SubGraphContext subgraph_context_;
GlobalContext global_context_;
EPCtxHandler ep_ctx_handle_{};
std::string openvino_sdk_version_{};
};

} // namespace openvino_ep
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
#include <sstream>
#include <fstream>

#include "ov_interface.h"
#include "openvino/pass/convert_fp32_to_fp16.hpp"
#include "openvino/pass/constant_folding.hpp"
#include "core/providers/shared_library/provider_api.h"
#include "backend_utils.h"
#include "core/providers/openvino/backend_utils.h"
#include "core/providers/openvino/ov_interface.h"

using Exception = ov::Exception;

Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/providers/openvino/backend_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
#include <string>

#include "core/session/onnxruntime_cxx_api.h"
#include "contexts.h"
#include "ov_interface.h"
#include "core/providers/openvino/contexts.h"
#include "core/providers/openvino/ov_interface.h"
#ifdef _WIN32
#include <direct.h>
#define GetCurrentDir _getcwd
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@
#include "core/providers/shared_library/provider_api.h"
#include "core/providers/openvino/contexts.h"
#include "core/providers/openvino/ibackend.h"
#include "basic_backend.h"
#include "core/providers/openvino/backends/basic_backend.h"

namespace onnxruntime {
namespace openvino_ep {

std::shared_ptr<IBackend>
BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
GlobalContext& global_context,
const SubGraphContext& subgraph_context) {
const SubGraphContext& subgraph_context,
EPCtxHandler& ep_ctx_handle) {
std::string type = global_context.device_type;
if (type == "CPU" || type.find("GPU") != std::string::npos ||
type.find("NPU") != std::string::npos ||
Expand All @@ -22,7 +23,7 @@ BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
type.find("AUTO") != std::string::npos) {
std::shared_ptr<IBackend> concrete_backend_;
try {
concrete_backend_ = std::make_shared<BasicBackend>(model_proto, global_context, subgraph_context);
concrete_backend_ = std::make_shared<BasicBackend>(model_proto, global_context, subgraph_context, ep_ctx_handle);
} catch (std::string const& msg) {
ORT_THROW(msg);
}
Expand Down
Loading

0 comments on commit 4d1963c

Please sign in to comment.