Skip to content

Commit

Permalink
Openvino EP code changes for 1.17 update (#19023)
Browse files Browse the repository at this point in the history
### Description
Introduce AppendExecutionProvider_OpenVINO_V2 API and support for OV
2023.3.


### Context

- The API is added to facilitate customers in using published official
Microsoft onnxruntime libraries with OVEP libraries.
- Add support for OpenVINO 2023.3 official release.
- Extend operator coverage 
- GH fixes

---------

Co-authored-by: Suryaprakash Shanmugam <[email protected]>
  • Loading branch information
2 people authored and mszhanyi committed Jan 15, 2024
1 parent 927ffe4 commit 2a5f3f9
Show file tree
Hide file tree
Showing 22 changed files with 201 additions and 113 deletions.
14 changes: 7 additions & 7 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1296,21 +1296,21 @@ if (onnxruntime_USE_OPENVINO)
endif()

# Check OpenVINO version for support
if ($ENV{INTEL_OPENVINO_DIR} MATCHES "2022.3")
set(OPENVINO_VERSION "2022.3")
add_definitions(-DOPENVINO_2022_3=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0")
if ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0")
set(OPENVINO_VERSION "2023.0")
add_definitions(-DOPENVINO_2023_0=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.1")
set(OPENVINO_VERSION "2023.1")
add_definitions(-DOPENVINO_2023_1=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.2")
set(OPENVINO_VERSION "2023.2")
add_definitions(-DOPENVINO_2023_1=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino")
set(OPENVINO_VERSION "2023.2")
add_definitions(-DOPENVINO_2023_2=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.3")
set(OPENVINO_VERSION "2023.3")
add_definitions(-DOPENVINO_2023_3=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino")
set(OPENVINO_VERSION "2023.3")
add_definitions(-DOPENVINO_2023_3=1)
else()
message(FATAL_ERROR "Unsupported OpenVINO version: ${INTEL_OPENVINO_DIR}")
endif()
Expand Down
17 changes: 17 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -4541,6 +4541,23 @@ struct OrtApi {
* \since Version 1.17.
*/
ORT_API2_STATUS(KernelContext_ParallelFor, _In_ const OrtKernelContext* context, _In_ void (*fn)(void*, size_t), _In_ size_t total, _In_ size_t num_batch, _In_ void* usr_data);

/** \brief Append OpenVINO execution provider to the session options
*
* If OpenVINO is not available (due to a non OpenVINO enabled build, or if OpenVINO is not installed on the system), this function will fail.
*
* \param[in] options
* \param[in] provider_options_keys
* \param[in] provider_options_values
* \param[in] num_keys
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(SessionOptionsAppendExecutionProvider_OpenVINO_V2,
_In_ OrtSessionOptions* options,
_In_reads_(num_keys) const char* const* provider_options_keys,
_In_reads_(num_keys) const char* const* provider_options_values,
_In_ size_t num_keys);
};

/*
Expand Down
10 changes: 6 additions & 4 deletions include/onnxruntime/core/session/onnxruntime_cxx_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -874,10 +874,12 @@ struct SessionOptionsImpl : ConstSessionOptionsImpl<T> {
SessionOptionsImpl& AddInitializer(const char* name, const OrtValue* ort_val); ///< Wraps OrtApi::AddInitializer
SessionOptionsImpl& AddExternalInitializers(const std::vector<std::string>& names, const std::vector<Value>& ort_values); ///< Wraps OrtApi::AddExternalInitializers

SessionOptionsImpl& AppendExecutionProvider_CUDA(const OrtCUDAProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA
SessionOptionsImpl& AppendExecutionProvider_CUDA_V2(const OrtCUDAProviderOptionsV2& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA_V2
SessionOptionsImpl& AppendExecutionProvider_ROCM(const OrtROCMProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_ROCM
SessionOptionsImpl& AppendExecutionProvider_OpenVINO(const OrtOpenVINOProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_OpenVINO
SessionOptionsImpl& AppendExecutionProvider_CUDA(const OrtCUDAProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA
SessionOptionsImpl& AppendExecutionProvider_CUDA_V2(const OrtCUDAProviderOptionsV2& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA_V2
SessionOptionsImpl& AppendExecutionProvider_ROCM(const OrtROCMProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_ROCM
SessionOptionsImpl& AppendExecutionProvider_OpenVINO(const OrtOpenVINOProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_OpenVINO
///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_OpenVINO_V2
SessionOptionsImpl& AppendExecutionProvider_OpenVINO_V2(const std::unordered_map<std::string, std::string>& provider_options = {});
SessionOptionsImpl& AppendExecutionProvider_TensorRT(const OrtTensorRTProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_TensorRT
SessionOptionsImpl& AppendExecutionProvider_TensorRT_V2(const OrtTensorRTProviderOptionsV2& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_TensorRT
SessionOptionsImpl& AppendExecutionProvider_MIGraphX(const OrtMIGraphXProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_MIGraphX
Expand Down
20 changes: 20 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,26 @@ inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::AppendExecutionProvider_Ope
return *this;
}

template <typename T>
inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::AppendExecutionProvider_OpenVINO_V2(const std::unordered_map<std::string, std::string>& provider_options) {
auto num_entries = provider_options.size();
std::vector<const char*> keys, values;
if (num_entries > 0) {
keys.reserve(num_entries);
values.reserve(num_entries);

for (const auto& entry : provider_options) {
keys.push_back(entry.first.c_str());
values.push_back(entry.second.c_str());
}
}

ThrowOnError(GetApi().SessionOptionsAppendExecutionProvider_OpenVINO_V2(this->p_,
keys.data(), values.data(), num_entries));

return *this;
}

template <typename T>
inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::RegisterCustomOpsLibrary(const ORTCHAR_T* library_name,
const CustomOpConfigs& custom_op_configs) {
Expand Down
17 changes: 5 additions & 12 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,16 @@
namespace onnxruntime {
namespace openvino_ep {

static std::unique_ptr<GlobalContext> g_global_context;

GlobalContext& BackendManager::GetGlobalContext() {
// This is not thread safe to call for the first time,
// but it is first called on the main thread by the constructor so it is safe.
if (!g_global_context)
g_global_context = std::make_unique<GlobalContext>();
return *g_global_context;
}

void BackendManager::ReleaseGlobalContext() {
g_global_context.reset();
return global_context_;
}

BackendManager::BackendManager(const onnxruntime::Node& fused_node,
BackendManager::BackendManager(const GlobalContext& global_context,
const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger) {
global_context_ = global_context;

auto prec_str = GetGlobalContext().precision_str;
if (prec_str == "FP32") {
subgraph_context_.precision = "FP32";
Expand Down
8 changes: 5 additions & 3 deletions onnxruntime/core/providers/openvino/backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ namespace openvino_ep {
// Singleton class that manages all the backends
class BackendManager {
public:
BackendManager(const onnxruntime::Node& fused_node,
BackendManager(const GlobalContext& global_context,
const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger);
void Compute(OrtKernelContext* context);
void ShutdownBackendManager();
static GlobalContext& GetGlobalContext();
static void ReleaseGlobalContext();
void SetGlobalCotext(const GlobalContext& global_context);
GlobalContext& GetGlobalContext();

private:
std::unique_ptr<ONNX_NAMESPACE::ModelProto> GetModelProtoFromFusedNode(
Expand All @@ -45,6 +46,7 @@ class BackendManager {
std::shared_ptr<IBackend> concrete_backend_;
std::map<std::string, std::shared_ptr<IBackend>> backend_map_;
SubGraphContext subgraph_context_;
GlobalContext global_context_;
};

} // namespace openvino_ep
Expand Down
2 changes: 0 additions & 2 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,11 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
}
}
#ifndef NDEBUG
#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1) || (OPENVINO_2023_2)
if (IsDebugEnabled()) {
std::string name = cnn_network->get_friendly_name();
ov::pass::Serialize serializer(name + ".xml", name + ".bin");
serializer.run_on_model(cnn_network);
}
#endif
#endif
return cnn_network;
} catch (std::string const& msg) {
Expand Down
12 changes: 1 addition & 11 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
}
#else
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1) || (OPENVINO_2023_2)
if (global_context_.disable_dynamic_shapes && dev_prec != "CPU_FP16") {
const std::string model = model_proto.SerializeAsString();
exe_network_ = global_context_.ie_core.LoadNetwork(
Expand All @@ -82,12 +81,6 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
}
#else
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
exe_network_ = global_context_.ie_core.LoadNetwork(
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
#endif
#endif
} else {
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
Expand Down Expand Up @@ -126,13 +119,11 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
device_config.emplace(ov::enable_profiling(true));
}
#endif
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1) || (OPENVION_2023_2)
if (global_context_.device_type.find("NPU") != std::string::npos) {
std::pair<std::string, ov::Any> device_property;
device_property = std::make_pair("NPU_COMPILER_TYPE", "DRIVER");
device_config.emplace(ov::device::properties("NPU", device_property));
}
#endif
}

void BasicBackend::EnableCaching() {
Expand Down Expand Up @@ -463,8 +454,7 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {

#ifdef IO_BUFFER_ENABLED
if ((global_context_.device_type.find("GPU") != std::string::npos) &&
(global_context_.context != nullptr) &&
(openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph)) {
(global_context_.context != nullptr) && global_context_.is_wholly_supported_graph) {
try {
StartRemoteAsyncInference(context, infer_request);
} catch (std::string const& msg) {
Expand Down
59 changes: 33 additions & 26 deletions onnxruntime/core/providers/openvino/openvino_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "openvino_execution_provider.h"
#include "contexts.h"
#include "backend_manager.h"
#include "ov_versions/capabilities.h"
#include "ov_versions/capability.h"

#define MEMCPY_S(dest, src, destsz, srcsz) memcpy(dest, src, std::min(destsz, srcsz))

Expand All @@ -15,22 +15,23 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
: IExecutionProvider{onnxruntime::kOpenVINOExecutionProvider} {
InitProviderOrtApi();

openvino_ep::BackendManager::GetGlobalContext().device_type = info.device_type_;
openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
openvino_ep::BackendManager::GetGlobalContext().enable_npu_fast_compile = info.enable_npu_fast_compile_;
openvino_ep::BackendManager::GetGlobalContext().cache_dir = info.cache_dir_;
openvino_ep::BackendManager::GetGlobalContext().num_streams = info.num_streams_;
openvino_ep::BackendManager::GetGlobalContext().context = info.context_;
openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_;
openvino_ep::BackendManager::GetGlobalContext().disable_dynamic_shapes = info.disable_dynamic_shapes_;
openvino_ep::BackendManager::GetGlobalContext().num_of_threads = info.num_of_threads_;
global_context_ = std::make_unique<openvino_ep::GlobalContext>();
global_context_->device_type = info.device_type_;
global_context_->precision_str = info.precision_;
global_context_->enable_npu_fast_compile = info.enable_npu_fast_compile_;
global_context_->cache_dir = info.cache_dir_;
global_context_->num_streams = info.num_streams_;
global_context_->context = info.context_;
global_context_->enable_opencl_throttling = info.enable_opencl_throttling_;
global_context_->disable_dynamic_shapes = info.disable_dynamic_shapes_;
global_context_->num_of_threads = info.num_of_threads_;

// to check if target device is available
// using ie_core capability GetAvailableDevices to fetch list of devices plugged in
if (info.cache_dir_.empty()) {
bool device_found = false;
bool device_id_found = false;
auto available_devices = openvino_ep::BackendManager::GetGlobalContext().ie_core.GetAvailableDevices();
auto available_devices = global_context_->ie_core.GetAvailableDevices();
// Checking for device_type configuration
if (info.device_type_ != "") {
if (info.device_type_.find("HETERO") != std::string::npos ||
Expand Down Expand Up @@ -89,7 +90,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
}
}
}
openvino_ep::BackendManager::GetGlobalContext().device_id = info.device_id_;
global_context_->device_id = info.device_id_;
}

std::vector<std::unique_ptr<ComputeCapability>>
Expand All @@ -100,36 +101,42 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
if (!(GetEnvironmentVar("ORT_OPENVINO_ENABLE_CI_LOG").empty())) {
std::cout << "In the OpenVINO EP" << std::endl;
}
openvino_ep::BackendManager::GetGlobalContext().onnx_model_name = graph_viewer.Name();
global_context_->onnx_model_name = graph_viewer.Name();
#ifdef _WIN32
std::wstring onnx_path = graph_viewer.ModelPath().ToPathString();
openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name =
global_context_->onnx_model_path_name =
std::string(onnx_path.begin(), onnx_path.end());
#else
openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name =
global_context_->onnx_model_path_name =
graph_viewer.ModelPath().ToPathString();
#endif
openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version =
global_context_->onnx_opset_version =
graph_viewer.DomainToVersionMap().at(kOnnxDomain);

#if defined(OPENVINO_2022_3)
#if defined(OPENVINO_2023_0)
openvino_ep::GetCapability obj(graph_viewer,
openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2022_3");
result = obj.Execute();
#elif defined(OPENVINO_2023_0)
openvino_ep::GetCapability obj(graph_viewer,
openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_0");
global_context_->device_type,
global_context_->precision_str, "V_2023_0");
result = obj.Execute();
#elif defined(OPENVINO_2023_1)
openvino_ep::GetCapability obj(graph_viewer,
openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_1");
global_context_->device_type,
global_context_->precision_str, "V_2023_1");
result = obj.Execute();
#elif defined(OPENVINO_2023_2)
openvino_ep::GetCapability obj(graph_viewer,
openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_2");
global_context_->device_type,
global_context_->precision_str, "V_2023_2");
result = obj.Execute();
#elif defined(OPENVINO_2023_3)
openvino_ep::GetCapability obj(graph_viewer,
global_context_->device_type,
global_context_->precision_str, "V_2023_3");
result = obj.Execute();
#endif

global_context_->is_wholly_supported_graph = obj.IsWhollySupportedGraph();

return result;
}

Expand All @@ -142,10 +149,10 @@ common::Status OpenVINOExecutionProvider::Compile(

NodeComputeInfo compute_info;

openvino_ep::BackendManager::GetGlobalContext().use_api_2 = true;
global_context_->use_api_2 = true;

std::shared_ptr<openvino_ep::BackendManager> backend_manager =
std::make_shared<openvino_ep::BackendManager>(fused_node, graph_body_viewer, *GetLogger());
std::make_shared<openvino_ep::BackendManager>(*global_context_, fused_node, graph_body_viewer, *GetLogger());

compute_info.create_state_func =
[backend_manager](ComputeContext* context, FunctionState* state) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,9 @@ class OpenVINOExecutionProvider : public IExecutionProvider {
const void* GetExecutionHandle() const noexcept override {
return nullptr;
}

private:
std::unique_ptr<openvino_ep::GlobalContext> global_context_;
};

} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ struct OpenVINO_Provider : Provider {
}

void Shutdown() override {
openvino_ep::BackendManager::ReleaseGlobalContext();
}
} g_provider;

Expand Down
2 changes: 0 additions & 2 deletions onnxruntime/core/providers/openvino/ov_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network,
}
}

#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1) || (OPENVINO_2023_2)
OVExeNetwork OVCore::LoadNetwork(const std::string& model,
std::string& hw_target,
ov::AnyMap& device_config,
Expand All @@ -103,7 +102,6 @@ OVExeNetwork OVCore::LoadNetwork(const std::string& model,
ORT_THROW(log_tag + " Exception while Loading Network for graph " + name);
}
}
#endif

void OVCore::SetCache(std::string cache_dir_path) {
oe.set_property(ov::cache_dir(cache_dir_path));
Expand Down
6 changes: 0 additions & 6 deletions onnxruntime/core/providers/openvino/ov_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,10 @@
#include <vector>
#include <memory>

#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1) || (OPENVINO_2023_2)
#define OV_API_20
#include "openvino/openvino.hpp"
#include "openvino/pass/convert_fp32_to_fp16.hpp"
#include "openvino/frontend/manager.hpp"
#else
#include <inference_engine.hpp>
#endif

#ifdef IO_BUFFER_ENABLED
#include <gpu/gpu_context_api_ocl.hpp>
Expand Down Expand Up @@ -49,12 +45,10 @@ class OVCore {
std::string& hw_target,
ov::AnyMap& device_config,
std::string name);
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1) || (OPENVINO_2023_2)
OVExeNetwork LoadNetwork(const std::string& model_stream,
std::string& hw_target,
ov::AnyMap& device_config,
std::string name);
#endif
void SetCache(std::string cache_dir_path);
#ifdef IO_BUFFER_ENABLED
OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& model, OVRemoteContextPtr context, std::string& name);
Expand Down
Loading

0 comments on commit 2a5f3f9

Please sign in to comment.