From 2a5f3f9751cb6366434897dfd59a0cccf49aba6f Mon Sep 17 00:00:00 2001 From: Preetha Veeramalai Date: Sat, 13 Jan 2024 02:50:51 +0530 Subject: [PATCH] Openvino EP code changes for 1.17 update (#19023) ### Description Introduce AppendExecutionProvider_OpenVINO_V2 API and support for OV 2023.3. ### Context - The API is added to facilitate customers in using published official Microsoft onnxruntime libraries with OVEP libraries. - Add support for OpenVINO 2023.3 official release. - Extend operator coverage - GH fixes --------- Co-authored-by: Suryaprakash Shanmugam --- cmake/CMakeLists.txt | 14 ++--- .../core/session/onnxruntime_c_api.h | 17 ++++++ .../core/session/onnxruntime_cxx_api.h | 10 ++-- .../core/session/onnxruntime_cxx_inline.h | 20 +++++++ .../providers/openvino/backend_manager.cc | 17 ++---- .../core/providers/openvino/backend_manager.h | 8 ++- .../core/providers/openvino/backend_utils.cc | 2 - .../openvino/backends/basic_backend.cc | 12 +--- .../openvino/openvino_execution_provider.cc | 59 +++++++++++-------- .../openvino/openvino_execution_provider.h | 3 + .../openvino/openvino_provider_factory.cc | 1 - .../core/providers/openvino/ov_interface.cc | 2 - .../core/providers/openvino/ov_interface.h | 6 -- .../openvino/ov_versions/capability.cc | 24 ++++---- .../{capabilities.h => capability.h} | 10 +++- .../openvino/ov_versions/data_ops.cc | 24 ++++---- .../providers/openvino/ov_versions/data_ops.h | 8 ++- onnxruntime/core/session/onnxruntime_c_api.cc | 1 + onnxruntime/core/session/ort_apis.h | 5 ++ .../core/session/provider_bridge_ort.cc | 57 ++++++++++++++---- .../core/session/provider_registration.cc | 12 ++++ onnxruntime/test/perftest/ort_test_session.cc | 2 +- 22 files changed, 201 insertions(+), 113 deletions(-) rename onnxruntime/core/providers/openvino/ov_versions/{capabilities.h => capability.h} (57%) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 1567da90cacfc..bc96218dac79e 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -1296,21 +1296,21 @@ if (onnxruntime_USE_OPENVINO) endif() # Check OpenVINO version for support - if ($ENV{INTEL_OPENVINO_DIR} MATCHES "2022.3") - set(OPENVINO_VERSION "2022.3") - add_definitions(-DOPENVINO_2022_3=1) - elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0") + if ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0") set(OPENVINO_VERSION "2023.0") add_definitions(-DOPENVINO_2023_0=1) elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.1") set(OPENVINO_VERSION "2023.1") add_definitions(-DOPENVINO_2023_1=1) elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.2") - set(OPENVINO_VERSION "2023.2") - add_definitions(-DOPENVINO_2023_1=1) - elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino") set(OPENVINO_VERSION "2023.2") add_definitions(-DOPENVINO_2023_2=1) + elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.3") + set(OPENVINO_VERSION "2023.3") + add_definitions(-DOPENVINO_2023_3=1) + elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino") + set(OPENVINO_VERSION "2023.3") + add_definitions(-DOPENVINO_2023_3=1) else() message(FATAL_ERROR "Unsupported OpenVINO version: ${INTEL_OPENVINO_DIR}") endif() diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 504f1db7b4420..3e69923330d6b 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -4541,6 +4541,23 @@ struct OrtApi { * \since Version 1.17. */ ORT_API2_STATUS(KernelContext_ParallelFor, _In_ const OrtKernelContext* context, _In_ void (*fn)(void*, size_t), _In_ size_t total, _In_ size_t num_batch, _In_ void* usr_data); + + /** \brief Append OpenVINO execution provider to the session options + * + * If OpenVINO is not available (due to a non OpenVINO enabled build, or if OpenVINO is not installed on the system), this function will fail. + * + * \param[in] options + * \param[in] provider_options_keys + * \param[in] provider_options_values + * \param[in] num_keys + * + * \snippet{doc} snippets.dox OrtStatus Return Value + */ + ORT_API2_STATUS(SessionOptionsAppendExecutionProvider_OpenVINO_V2, + _In_ OrtSessionOptions* options, + _In_reads_(num_keys) const char* const* provider_options_keys, + _In_reads_(num_keys) const char* const* provider_options_values, + _In_ size_t num_keys); }; /* diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index 3773a01cb65a8..7a553f9f94006 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -874,10 +874,12 @@ struct SessionOptionsImpl : ConstSessionOptionsImpl { SessionOptionsImpl& AddInitializer(const char* name, const OrtValue* ort_val); ///< Wraps OrtApi::AddInitializer SessionOptionsImpl& AddExternalInitializers(const std::vector& names, const std::vector& ort_values); ///< Wraps OrtApi::AddExternalInitializers - SessionOptionsImpl& AppendExecutionProvider_CUDA(const OrtCUDAProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA - SessionOptionsImpl& AppendExecutionProvider_CUDA_V2(const OrtCUDAProviderOptionsV2& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA_V2 - SessionOptionsImpl& AppendExecutionProvider_ROCM(const OrtROCMProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_ROCM - SessionOptionsImpl& AppendExecutionProvider_OpenVINO(const OrtOpenVINOProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_OpenVINO + SessionOptionsImpl& AppendExecutionProvider_CUDA(const OrtCUDAProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA + SessionOptionsImpl& AppendExecutionProvider_CUDA_V2(const OrtCUDAProviderOptionsV2& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA_V2 + SessionOptionsImpl& AppendExecutionProvider_ROCM(const OrtROCMProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_ROCM + SessionOptionsImpl& AppendExecutionProvider_OpenVINO(const OrtOpenVINOProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_OpenVINO + ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_OpenVINO_V2 + SessionOptionsImpl& AppendExecutionProvider_OpenVINO_V2(const std::unordered_map& provider_options = {}); SessionOptionsImpl& AppendExecutionProvider_TensorRT(const OrtTensorRTProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_TensorRT SessionOptionsImpl& AppendExecutionProvider_TensorRT_V2(const OrtTensorRTProviderOptionsV2& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_TensorRT SessionOptionsImpl& AppendExecutionProvider_MIGraphX(const OrtMIGraphXProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_MIGraphX diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index db4619eeeae62..957e849cf5d4d 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -865,6 +865,26 @@ inline SessionOptionsImpl& SessionOptionsImpl::AppendExecutionProvider_Ope return *this; } +template +inline SessionOptionsImpl& SessionOptionsImpl::AppendExecutionProvider_OpenVINO_V2(const std::unordered_map& provider_options) { + auto num_entries = provider_options.size(); + std::vector keys, values; + if (num_entries > 0) { + keys.reserve(num_entries); + values.reserve(num_entries); + + for (const auto& entry : provider_options) { + keys.push_back(entry.first.c_str()); + values.push_back(entry.second.c_str()); + } + } + + ThrowOnError(GetApi().SessionOptionsAppendExecutionProvider_OpenVINO_V2(this->p_, + keys.data(), values.data(), num_entries)); + + return *this; +} + template inline SessionOptionsImpl& SessionOptionsImpl::RegisterCustomOpsLibrary(const ORTCHAR_T* library_name, const CustomOpConfigs& custom_op_configs) { diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index b2a7028f49e55..330b464ffd1bb 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -13,23 +13,16 @@ namespace onnxruntime { namespace openvino_ep { -static std::unique_ptr g_global_context; - GlobalContext& BackendManager::GetGlobalContext() { - // This is not thread safe to call for the first time, - // but it is first called on the main thread by the constructor so it is safe. - if (!g_global_context) - g_global_context = std::make_unique(); - return *g_global_context; -} - -void BackendManager::ReleaseGlobalContext() { - g_global_context.reset(); + return global_context_; } -BackendManager::BackendManager(const onnxruntime::Node& fused_node, +BackendManager::BackendManager(const GlobalContext& global_context, + const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger) { + global_context_ = global_context; + auto prec_str = GetGlobalContext().precision_str; if (prec_str == "FP32") { subgraph_context_.precision = "FP32"; diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h index a177324b23f7d..59bda7ca640ee 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.h +++ b/onnxruntime/core/providers/openvino/backend_manager.h @@ -18,13 +18,14 @@ namespace openvino_ep { // Singleton class that manages all the backends class BackendManager { public: - BackendManager(const onnxruntime::Node& fused_node, + BackendManager(const GlobalContext& global_context, + const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger); void Compute(OrtKernelContext* context); void ShutdownBackendManager(); - static GlobalContext& GetGlobalContext(); - static void ReleaseGlobalContext(); + void SetGlobalCotext(const GlobalContext& global_context); + GlobalContext& GetGlobalContext(); private: std::unique_ptr GetModelProtoFromFusedNode( @@ -45,6 +46,7 @@ class BackendManager { std::shared_ptr concrete_backend_; std::map> backend_map_; SubGraphContext subgraph_context_; + GlobalContext global_context_; }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 5092fffcfc111..50c839017df2a 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -95,13 +95,11 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext } } #ifndef NDEBUG -#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1) || (OPENVINO_2023_2) if (IsDebugEnabled()) { std::string name = cnn_network->get_friendly_name(); ov::pass::Serialize serializer(name + ".xml", name + ".bin"); serializer.run_on_model(cnn_network); } -#endif #endif return cnn_network; } catch (std::string const& msg) { diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 2280d853e30f4..e6c093d584031 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -70,7 +70,6 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; } #else -#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1) || (OPENVINO_2023_2) if (global_context_.disable_dynamic_shapes && dev_prec != "CPU_FP16") { const std::string model = model_proto.SerializeAsString(); exe_network_ = global_context_.ie_core.LoadNetwork( @@ -82,12 +81,6 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name); LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; } -#else - ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_); - exe_network_ = global_context_.ie_core.LoadNetwork( - ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name); - LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; -#endif #endif } else { ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_); @@ -126,13 +119,11 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) { device_config.emplace(ov::enable_profiling(true)); } #endif -#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1) || (OPENVION_2023_2) if (global_context_.device_type.find("NPU") != std::string::npos) { std::pair device_property; device_property = std::make_pair("NPU_COMPILER_TYPE", "DRIVER"); device_config.emplace(ov::device::properties("NPU", device_property)); } -#endif } void BasicBackend::EnableCaching() { @@ -463,8 +454,7 @@ void BasicBackend::Infer(OrtKernelContext* ctx) { #ifdef IO_BUFFER_ENABLED if ((global_context_.device_type.find("GPU") != std::string::npos) && - (global_context_.context != nullptr) && - (openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph)) { + (global_context_.context != nullptr) && global_context_.is_wholly_supported_graph) { try { StartRemoteAsyncInference(context, infer_request); } catch (std::string const& msg) { diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index aa389f6297d80..e3948cc94b348 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -5,7 +5,7 @@ #include "openvino_execution_provider.h" #include "contexts.h" #include "backend_manager.h" -#include "ov_versions/capabilities.h" +#include "ov_versions/capability.h" #define MEMCPY_S(dest, src, destsz, srcsz) memcpy(dest, src, std::min(destsz, srcsz)) @@ -15,22 +15,23 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv : IExecutionProvider{onnxruntime::kOpenVINOExecutionProvider} { InitProviderOrtApi(); - openvino_ep::BackendManager::GetGlobalContext().device_type = info.device_type_; - openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_; - openvino_ep::BackendManager::GetGlobalContext().enable_npu_fast_compile = info.enable_npu_fast_compile_; - openvino_ep::BackendManager::GetGlobalContext().cache_dir = info.cache_dir_; - openvino_ep::BackendManager::GetGlobalContext().num_streams = info.num_streams_; - openvino_ep::BackendManager::GetGlobalContext().context = info.context_; - openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_; - openvino_ep::BackendManager::GetGlobalContext().disable_dynamic_shapes = info.disable_dynamic_shapes_; - openvino_ep::BackendManager::GetGlobalContext().num_of_threads = info.num_of_threads_; + global_context_ = std::make_unique(); + global_context_->device_type = info.device_type_; + global_context_->precision_str = info.precision_; + global_context_->enable_npu_fast_compile = info.enable_npu_fast_compile_; + global_context_->cache_dir = info.cache_dir_; + global_context_->num_streams = info.num_streams_; + global_context_->context = info.context_; + global_context_->enable_opencl_throttling = info.enable_opencl_throttling_; + global_context_->disable_dynamic_shapes = info.disable_dynamic_shapes_; + global_context_->num_of_threads = info.num_of_threads_; // to check if target device is available // using ie_core capability GetAvailableDevices to fetch list of devices plugged in if (info.cache_dir_.empty()) { bool device_found = false; bool device_id_found = false; - auto available_devices = openvino_ep::BackendManager::GetGlobalContext().ie_core.GetAvailableDevices(); + auto available_devices = global_context_->ie_core.GetAvailableDevices(); // Checking for device_type configuration if (info.device_type_ != "") { if (info.device_type_.find("HETERO") != std::string::npos || @@ -89,7 +90,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv } } } - openvino_ep::BackendManager::GetGlobalContext().device_id = info.device_id_; + global_context_->device_id = info.device_id_; } std::vector> @@ -100,36 +101,42 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer, if (!(GetEnvironmentVar("ORT_OPENVINO_ENABLE_CI_LOG").empty())) { std::cout << "In the OpenVINO EP" << std::endl; } - openvino_ep::BackendManager::GetGlobalContext().onnx_model_name = graph_viewer.Name(); + global_context_->onnx_model_name = graph_viewer.Name(); #ifdef _WIN32 std::wstring onnx_path = graph_viewer.ModelPath().ToPathString(); - openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = + global_context_->onnx_model_path_name = std::string(onnx_path.begin(), onnx_path.end()); #else - openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = + global_context_->onnx_model_path_name = graph_viewer.ModelPath().ToPathString(); #endif - openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version = + global_context_->onnx_opset_version = graph_viewer.DomainToVersionMap().at(kOnnxDomain); -#if defined(OPENVINO_2022_3) +#if defined(OPENVINO_2023_0) openvino_ep::GetCapability obj(graph_viewer, - openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2022_3"); - result = obj.Execute(); -#elif defined(OPENVINO_2023_0) - openvino_ep::GetCapability obj(graph_viewer, - openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_0"); + global_context_->device_type, + global_context_->precision_str, "V_2023_0"); result = obj.Execute(); #elif defined(OPENVINO_2023_1) openvino_ep::GetCapability obj(graph_viewer, - openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_1"); + global_context_->device_type, + global_context_->precision_str, "V_2023_1"); result = obj.Execute(); #elif defined(OPENVINO_2023_2) openvino_ep::GetCapability obj(graph_viewer, - openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_2"); + global_context_->device_type, + global_context_->precision_str, "V_2023_2"); + result = obj.Execute(); +#elif defined(OPENVINO_2023_3) + openvino_ep::GetCapability obj(graph_viewer, + global_context_->device_type, + global_context_->precision_str, "V_2023_3"); result = obj.Execute(); #endif + global_context_->is_wholly_supported_graph = obj.IsWhollySupportedGraph(); + return result; } @@ -142,10 +149,10 @@ common::Status OpenVINOExecutionProvider::Compile( NodeComputeInfo compute_info; - openvino_ep::BackendManager::GetGlobalContext().use_api_2 = true; + global_context_->use_api_2 = true; std::shared_ptr backend_manager = - std::make_shared(fused_node, graph_body_viewer, *GetLogger()); + std::make_shared(*global_context_, fused_node, graph_body_viewer, *GetLogger()); compute_info.create_state_func = [backend_manager](ComputeContext* context, FunctionState* state) { diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h index 7cc2fb9b1ea98..b0c92828d8a38 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h @@ -193,6 +193,9 @@ class OpenVINOExecutionProvider : public IExecutionProvider { const void* GetExecutionHandle() const noexcept override { return nullptr; } + + private: + std::unique_ptr global_context_; }; } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index 749907da18354..068456777bece 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -169,7 +169,6 @@ struct OpenVINO_Provider : Provider { } void Shutdown() override { - openvino_ep::BackendManager::ReleaseGlobalContext(); } } g_provider; diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index 31952e5b15e37..931173fd7ef47 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -87,7 +87,6 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr& ie_cnn_network, } } -#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1) || (OPENVINO_2023_2) OVExeNetwork OVCore::LoadNetwork(const std::string& model, std::string& hw_target, ov::AnyMap& device_config, @@ -103,7 +102,6 @@ OVExeNetwork OVCore::LoadNetwork(const std::string& model, ORT_THROW(log_tag + " Exception while Loading Network for graph " + name); } } -#endif void OVCore::SetCache(std::string cache_dir_path) { oe.set_property(ov::cache_dir(cache_dir_path)); diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h index 690e91742beed..3db19463809cf 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.h +++ b/onnxruntime/core/providers/openvino/ov_interface.h @@ -6,14 +6,10 @@ #include #include -#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1) || (OPENVINO_2023_2) #define OV_API_20 #include "openvino/openvino.hpp" #include "openvino/pass/convert_fp32_to_fp16.hpp" #include "openvino/frontend/manager.hpp" -#else -#include -#endif #ifdef IO_BUFFER_ENABLED #include @@ -49,12 +45,10 @@ class OVCore { std::string& hw_target, ov::AnyMap& device_config, std::string name); -#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1) || (OPENVINO_2023_2) OVExeNetwork LoadNetwork(const std::string& model_stream, std::string& hw_target, ov::AnyMap& device_config, std::string name); -#endif void SetCache(std::string cache_dir_path); #ifdef IO_BUFFER_ENABLED OVExeNetwork LoadNetwork(std::shared_ptr& model, OVRemoteContextPtr context, std::string& name); diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc index 4494bb8ab2d60..11c8a1629b073 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc @@ -4,7 +4,7 @@ #include "core/providers/shared_library/provider_api.h" #include "../backend_utils.h" #include "../backend_manager.h" -#include "capabilities.h" +#include "capability.h" #include "utils.h" #if defined(_MSC_VER) @@ -23,19 +23,21 @@ namespace onnxruntime { namespace openvino_ep { // Constructor -GetCapability::GetCapability(const GraphViewer& graph_viewer_param, std::string device_type_param, +GetCapability::GetCapability(const GraphViewer& graph_viewer_param, + const std::string device_type_param, + const std::string device_precision, const std::string version_param) - : graph_viewer_(graph_viewer_param), device_type_(device_type_param) { - if (version_param == "V_2022_3") { - data_ops_ = new DataOps(graph_viewer_, V_2022_3, device_type_); - } else if (version_param == "V_2023_0") { - data_ops_ = new DataOps(graph_viewer_, V_2023_0, device_type_); + : graph_viewer_(graph_viewer_param), device_type_(device_type_param), device_precision_(device_precision) { + if (version_param == "V_2023_0") { + data_ops_ = new DataOps(graph_viewer_, V_2023_0, device_type_, device_precision_); } else if (version_param == "V_2023_1") { - data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_); + data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_, device_precision_); } else if (version_param == "V_2023_2") { - data_ops_ = new DataOps(graph_viewer_, V_2023_2, device_type_); + data_ops_ = new DataOps(graph_viewer_, V_2023_2, device_type_, device_precision_); + } else if (version_param == "V_2023_3") { + data_ops_ = new DataOps(graph_viewer_, V_2023_3, device_type_, device_precision_); } else { - data_ops_ = new DataOps(graph_viewer_, V_2023_2, device_type_); + data_ops_ = new DataOps(graph_viewer_, V_2023_3, device_type_, device_precision_); } } @@ -111,7 +113,7 @@ std::vector> GetCapability::Execute() { if (backend_utils::IsCILogEnabled()) { std::cout << "Model is fully supported on OpenVINO" << std::endl; } - openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph = true; + is_wholly_supported_graph_ = true; } else { // unsupported_nodes_idx.empty() #if defined(OPENVINO_DISABLE_GRAPH_PARTITION) // disables graph partition at build time diff --git a/onnxruntime/core/providers/openvino/ov_versions/capabilities.h b/onnxruntime/core/providers/openvino/ov_versions/capability.h similarity index 57% rename from onnxruntime/core/providers/openvino/ov_versions/capabilities.h rename to onnxruntime/core/providers/openvino/ov_versions/capability.h index 5bcf9d68cd94e..2040634cc45d9 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/capabilities.h +++ b/onnxruntime/core/providers/openvino/ov_versions/capability.h @@ -14,11 +14,19 @@ class GetCapability { private: const GraphViewer& graph_viewer_; std::string device_type_; + std::string device_precision_; DataOps* data_ops_; + bool is_wholly_supported_graph_ = false; public: - GetCapability(const GraphViewer& graph_viewer_param, std::string device_type_param, const std::string version_param); + GetCapability(const GraphViewer& graph_viewer_param, + const std::string device_type_param, + const std::string precision, + const std::string version_param); virtual std::vector> Execute(); + bool IsWhollySupportedGraph() { + return is_wholly_supported_graph_; + } }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index 8749885660314..e829bf377b195 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -12,7 +12,7 @@ #include "../backend_utils.h" #include "../backend_manager.h" #include "data_ops.h" -#include "capabilities.h" +#include "capability.h" #include "utils.h" #if defined(_MSC_VER) @@ -440,11 +440,14 @@ void DataOps::populate_op_mode_supported() { no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Greater", V_2023_0, {"NPU"}}); + no_dimension_supported_.push_back({"Identity", V_2023_0, {"All"}}); no_dimension_supported_.push_back({"Less", V_2022_1, {"CPU"}}); no_dimension_supported_.push_back({"Loop", V_2021_4, {"All"}}); no_dimension_supported_.push_back({"Max", V_2023_0, {"NPU"}}); no_dimension_supported_.push_back({"Min", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Mul", V_2020_4, {"All"}}); + no_dimension_supported_.push_back({"Neg", V_2023_0, {"CPU", "GPU"}}); + no_dimension_supported_.push_back({"Pow", V_2023_0, {"CPU", "GPU"}}); no_dimension_supported_.push_back({"QuantizeLinear", V_2021_4, {"All"}}); no_dimension_supported_.push_back({"Range", V_2021_2, {"All"}}); no_dimension_supported_.push_back({"ReduceMax", V_2021_4, {"All"}}); @@ -453,6 +456,7 @@ void DataOps::populate_op_mode_supported() { no_dimension_supported_.push_back({"Reshape", V_2022_1, {"All"}}); no_dimension_supported_.push_back({"Shape", V_2022_1, {"GPU"}}); no_dimension_supported_.push_back({"Shape", V_2023_0, {"CPU"}}); + no_dimension_supported_.push_back({"Sqrt", V_2023_0, {"All"}}); no_dimension_supported_.push_back({"Squeeze", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Sub", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Unsqueeze", V_2020_4, {"All"}}); @@ -640,8 +644,7 @@ void DataOps::populate_op_mode_supported() { [this](const Node* node, const InitializedTensorSet&) { // Max op with one input is not supporting for GPU_FP16 if (device_id_.find("GPU") != std::string::npos) { - auto prec_str = openvino_ep::BackendManager::GetGlobalContext().precision_str; - if (prec_str == "FP16") { + if (device_precision_ == "FP16") { if (node->InputDefs().size() == 1) { return true; } @@ -656,8 +659,7 @@ void DataOps::populate_op_mode_supported() { [this](const Node* node, const InitializedTensorSet&) { // Min op with one input is not supporting for GPU_FP16 if (device_id_.find("GPU") != std::string::npos) { - auto prec_str = openvino_ep::BackendManager::GetGlobalContext().precision_str; - if (prec_str == "FP16") { + if (device_precision_ == "FP16") { if (node->InputDefs().size() == 1) { return true; } @@ -672,8 +674,7 @@ void DataOps::populate_op_mode_supported() { [this](const Node* node, const InitializedTensorSet&) { // Sum op with one input is not supporting for GPU_FP16 if (device_id_.find("GPU") != std::string::npos) { - auto prec_str = openvino_ep::BackendManager::GetGlobalContext().precision_str; - if (prec_str == "FP16") { + if (device_precision_ == "FP16") { if (node->InputDefs().size() == 1) { return true; } @@ -705,7 +706,7 @@ void DataOps::populate_op_mode_supported() { op_list_.insert({"PRelu", obj}); } { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1, V_2023_2}, + UnsupportedOpMode obj = {{V_2023_0, V_2023_1, V_2023_2, V_2023_3}, [this](const Node* node, const InitializedTensorSet&) { const auto& input_arg = node->InputDefs()[1]; auto shape = input_arg->Shape(); @@ -820,7 +821,7 @@ void DataOps::populate_op_mode_supported() { op_list_.insert({"Squeeze", obj}); } { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1, V_2023_2}, + UnsupportedOpMode obj = {{V_2023_0, V_2023_1, V_2023_2, V_2023_3}, [this](const Node* node, const InitializedTensorSet&) { // If the operator is unsqueeze // If axes is an input, then we cannot produce a static graph. @@ -835,7 +836,7 @@ void DataOps::populate_op_mode_supported() { op_list_.insert({"Unsqueeze", obj}); } { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1, V_2023_2}, + UnsupportedOpMode obj = {{V_2023_0, V_2023_1, V_2023_2, V_2023_3}, [this](const Node* node, const InitializedTensorSet&) { // check for attributes auto& upsample_attr = node->GetAttributes(); @@ -1131,9 +1132,6 @@ bool DataOps::node_is_supported(const std::map op_list_; std::vector subgraph_supported_; std::vector no_dimension_supported_; @@ -70,8 +72,8 @@ class DataOps { const NodeIndex node_idx); public: - DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, std::string dev_id) - : graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id) { + DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, const std::string dev_id, const std::string device_precision) + : graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id), device_precision_(device_precision) { populate_op_mode_supported(); populate_types_supported(); } diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 08bfb618f55b4..d77c188f832a7 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -2723,6 +2723,7 @@ static constexpr OrtApi ort_api_1_to_17 = { &OrtApis::ReadOpAttr, &OrtApis::SetDeterministicCompute, &OrtApis::KernelContext_ParallelFor, + &OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO_V2, }; // OrtApiBase can never change as there is no way to know what version of OrtApiBase is returned by OrtGetApiBase. diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h index 6df5e4145b416..c1caafa4dcad3 100644 --- a/onnxruntime/core/session/ort_apis.h +++ b/onnxruntime/core/session/ort_apis.h @@ -504,4 +504,9 @@ ORT_API_STATUS_IMPL(SetDeterministicCompute, _Inout_ OrtSessionOptions* options, ORT_API_STATUS_IMPL(KernelContext_ParallelFor, _In_ const OrtKernelContext* context, _In_ void (*fn)(void*, size_t), _In_ size_t total, _In_ size_t num_batch, _In_ void* user_data); +ORT_API_STATUS_IMPL(SessionOptionsAppendExecutionProvider_OpenVINO_V2, + _In_ OrtSessionOptions* options, + _In_reads_(num_keys) const char* const* provider_options_keys, + _In_reads_(num_keys) const char* const* provider_options_values, + _In_ size_t num_keys); } // namespace OrtApis diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index e2d46012c097b..2df30ba2de391 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1440,23 +1440,27 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O if (legacy_ov_options->device_id != nullptr) ov_options_converted_map["device_id"] = legacy_ov_options->device_id; - ov_options_converted_map["num_of_threads"] = std::to_string(legacy_ov_options->num_of_threads); + if (legacy_ov_options->num_of_threads != '\0') + ov_options_converted_map["num_of_threads"] = std::to_string(legacy_ov_options->num_of_threads); if (legacy_ov_options->cache_dir != nullptr) ov_options_converted_map["cache_dir"] = legacy_ov_options->cache_dir; - std::stringstream context_string; - - if (legacy_ov_options->context != nullptr) + if (legacy_ov_options->context != nullptr) { + std::stringstream context_string; context_string << legacy_ov_options->context; - ov_options_converted_map["context"] = context_string.str(); + ov_options_converted_map["context"] = context_string.str(); + } ov_options_converted_map["enable_opencl_throttling"] = legacy_ov_options->enable_opencl_throttling; - std::string enable_dynamic_shapes = reinterpret_cast(legacy_ov_options->enable_dynamic_shapes); - if (enable_dynamic_shapes == "true" || enable_dynamic_shapes == "True") { - ov_options_converted_map["disable_dynamic_shapes"] = "false"; - } else if (enable_dynamic_shapes == "false" || enable_dynamic_shapes == "False") { - ov_options_converted_map["disable_dynamic_shapes"] = "true"; + + if (legacy_ov_options->enable_dynamic_shapes != '\0') { + std::string enable_dynamic_shapes = reinterpret_cast(legacy_ov_options->enable_dynamic_shapes); + if (enable_dynamic_shapes == "true" || enable_dynamic_shapes == "True") { + ov_options_converted_map["disable_dynamic_shapes"] = "false"; + } else if (enable_dynamic_shapes == "false" || enable_dynamic_shapes == "False") { + ov_options_converted_map["disable_dynamic_shapes"] = "true"; + } } // Add new provider option below ov_options_converted_map["num_streams"] = "1"; @@ -1733,6 +1737,39 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, _In API_IMPL_END } +ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO_V2, + _In_ OrtSessionOptions* options, + _In_reads_(num_keys) const char* const* provider_options_keys, + _In_reads_(num_keys) const char* const* provider_options_values, + _In_ size_t num_keys) { + API_IMPL_BEGIN + onnxruntime::ProviderOptions provider_options; + for (size_t i = 0; i != num_keys; ++i) { + if (provider_options_keys[i] == nullptr || provider_options_keys[i][0] == '\0' || + provider_options_values[i] == nullptr || provider_options_values[i][0] == '\0') { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Provider options key/value cannot be empty"); + } + + // arbitrary length to validate the key/value. adjust if/when needed. + // TODO: are any other input validation checks required here (and in the other functions that process + // provider options)? + if (strlen(provider_options_keys[i]) > 1024 || strlen(provider_options_values[i]) > 1024) { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, + "Maximum string length for a provider options key/value is 1024."); + } + + provider_options[provider_options_keys[i]] = provider_options_values[i]; + } + auto factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(&provider_options); + if (!factory) { + return OrtApis::CreateStatus(ORT_FAIL, "SessionOptionsAppendExecutionProvider_OpenVINO_V2: Failed to load shared library"); + } + + options->provider_factories.push_back(factory); + return nullptr; + API_IMPL_END +} + ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_OpenVINO, _In_ OrtSessionOptions* options, _In_ const char* device_type) { OrtOpenVINOProviderOptions provider_options{}; diff --git a/onnxruntime/core/session/provider_registration.cc b/onnxruntime/core/session/provider_registration.cc index b012406bd026a..86b3d01c640a3 100644 --- a/onnxruntime/core/session/provider_registration.cc +++ b/onnxruntime/core/session/provider_registration.cc @@ -311,6 +311,18 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, return CreateNotEnabledStatus("OpenVINO"); } +ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO_V2, + _In_ OrtSessionOptions* options, + _In_reads_(num_keys) const char* const* provider_options_keys, + _In_reads_(num_keys) const char* const* provider_options_values, + _In_ size_t num_keys) { + ORT_UNUSED_PARAMETER(options); + ORT_UNUSED_PARAMETER(provider_options_keys); + ORT_UNUSED_PARAMETER(provider_options_values); + ORT_UNUSED_PARAMETER(num_keys); + return CreateNotEnabledStatus("OpenVINO"); +} + ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptions* tensorrt_options) { ORT_UNUSED_PARAMETER(options); diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 13082fe69cf48..27385d44e257a 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -307,7 +307,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_npu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling', 'disable_dynamic_shapes'] \n"); } } - session_options.AppendExecutionProvider("OpenVINO", ov_options); + session_options.AppendExecutionProvider_OpenVINO_V2(ov_options); #else ORT_THROW("OpenVINO is not supported in this build\n"); #endif