From 4d1963c2a2275a775eb52d5a578faa750d71b8c8 Mon Sep 17 00:00:00 2001 From: sfatimar Date: Fri, 19 Apr 2024 13:01:38 +0530 Subject: [PATCH] OpenVINO EP Rel 1.18 Changes (#20337) ### Description These changes include Support to OpenVINO 2024.1 Import PreCompiled Blobs with EPContext Blob Separate Device/Precision as input Deprecate CPU_FP32 , GPU_FP32 terminology , introduce CPU, GPU AUTO GPU, CPU will only create GPU Blob and not CPU Blob. ### Motivation and Context - OpenVINO 2024.1 will be out soon - Import Precompiled Blob can greatly reduce FEIL/FIL Time. - Separating Device/Precision will make the input cleaner - --------- Co-authored-by: Suryaprakash Shanmugam Co-authored-by: Preetha Veeramalai --- cmake/CMakeLists.txt | 34 +- dockerfiles/Dockerfile.openvino | 2 +- .../providers/openvino/backend_manager.cc | 77 +++- .../core/providers/openvino/backend_manager.h | 14 +- .../core/providers/openvino/backend_utils.cc | 4 +- .../core/providers/openvino/backend_utils.h | 4 +- .../openvino/backends/backend_factory.cc | 7 +- .../openvino/backends/basic_backend.cc | 103 ++++-- .../openvino/backends/basic_backend.h | 10 +- .../core/providers/openvino/contexts.h | 8 +- .../core/providers/openvino/ibackend.h | 5 +- .../openvino/onnx_ctx_model_helper.cc | 123 ++++++ .../openvino/onnx_ctx_model_helper.h | 45 +++ .../openvino/openvino_execution_provider.cc | 96 +++-- .../openvino/openvino_execution_provider.h | 92 ++--- .../openvino/openvino_provider_factory.cc | 156 ++++++-- .../core/providers/openvino/ov_interface.cc | 134 +++++-- .../core/providers/openvino/ov_interface.h | 34 +- .../openvino/ov_versions/capability.cc | 29 +- .../openvino/ov_versions/capability.h | 6 +- .../openvino/ov_versions/data_ops.cc | 349 +----------------- .../providers/openvino/ov_versions/data_ops.h | 12 +- .../core/session/provider_bridge_ort.cc | 6 +- .../python/onnxruntime_pybind_state.cc | 10 +- .../python/onnxruntime_pybind_state_common.h | 14 +- .../test/perftest/command_args_parser.cc | 2 +- onnxruntime/test/perftest/ort_test_session.cc | 59 ++- .../cpu/activation/activation_op_test.h | 2 +- .../test/providers/cpu/math/clip_test.cc | 2 +- .../cpu/math/element_wise_ops_test.cc | 26 +- .../test/providers/cpu/math/gemm_test.cc | 14 +- .../providers/cpu/nn/batch_norm_op_test.cc | 2 +- .../test/providers/cpu/nn/pool_op_test.cc | 2 +- .../cpu/reduction/reduction_ops_test.cc | 6 +- .../providers/cpu/tensor/scatter_op_test.cc | 2 +- .../providers/cpu/tensor/where_op_test.cc | 4 +- .../test/python/onnx_backend_test_series.py | 6 +- .../onnx_backend_test_series_filters.jsonc | 2 +- tools/ci_build/build.py | 28 +- .../linux-openvino-ci-pipeline.yml | 2 +- .../py-package-build-pipeline.yml | 2 +- .../py-packaging-selectable-stage.yml | 2 +- 42 files changed, 827 insertions(+), 710 deletions(-) create mode 100644 onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc create mode 100644 onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index a5cadc937e63d..1795052953d8c 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -1325,43 +1325,25 @@ if (onnxruntime_USE_OPENVINO) add_definitions(-DUSE_OPENVINO=1) - if (onnxruntime_USE_OPENVINO_GPU_FP32) - add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1) + if (onnxruntime_USE_OPENVINO_GPU) + add_definitions(-DOPENVINO_CONFIG_GPU=1) endif() - if (onnxruntime_USE_OPENVINO_GPU_FP16) - add_definitions(-DOPENVINO_CONFIG_GPU_FP16=1) - endif() - - if (onnxruntime_USE_OPENVINO_CPU_FP32) - add_definitions(-DOPENVINO_CONFIG_CPU_FP32=1) - endif() - - if (onnxruntime_USE_OPENVINO_CPU_FP16) - add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1) + if (onnxruntime_USE_OPENVINO_CPU) + add_definitions(-DOPENVINO_CONFIG_CPU=1) endif() if (onnxruntime_USE_OPENVINO_NPU) add_definitions(-DOPENVINO_CONFIG_NPU=1) endif() - if (onnxruntime_USE_OPENVINO_GPU_FP32_NP) - add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1) - add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1) - endif() - - if (onnxruntime_USE_OPENVINO_GPU_FP16_NP) - add_definitions(-DOPENVINO_CONFIG_GPU_FP16=1) - add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1) - endif() - - if (onnxruntime_USE_OPENVINO_CPU_FP32_NP) - add_definitions(-DOPENVINO_CONFIG_CPU_FP32=1) + if (onnxruntime_USE_OPENVINO_GPU_NP) + add_definitions(-DOPENVINO_CONFIG_GPU=1) add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1) endif() - if (onnxruntime_USE_OPENVINO_CPU_FP16_NP) - add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1) + if (onnxruntime_USE_OPENVINO_CPU_NP) + add_definitions(-DOPENVINO_CONFIG_CPU=1) add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1) endif() diff --git a/dockerfiles/Dockerfile.openvino b/dockerfiles/Dockerfile.openvino index 049916fac92f1..75898770acf28 100644 --- a/dockerfiles/Dockerfile.openvino +++ b/dockerfiles/Dockerfile.openvino @@ -13,7 +13,7 @@ ENV WORKDIR_PATH=/home/openvino WORKDIR $WORKDIR_PATH ENV DEBIAN_FRONTEND noninteractive -ARG DEVICE=CPU_FP32 +ARG DEVICE=CPU ARG ONNXRUNTIME_REPO=https://github.com/microsoft/onnxruntime.git ARG ONNXRUNTIME_BRANCH=main diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 3252603e33389..db0a33c557353 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -2,14 +2,14 @@ // Licensed under the MIT License #include +#include #include -#include #include "core/providers/shared_library/provider_api.h" -#include "contexts.h" -#include "backend_manager.h" -#include "ibackend.h" -#include "backend_utils.h" +#include "core/providers/openvino/contexts.h" +#include "core/providers/openvino/backend_manager.h" +#include "core/providers/openvino/ibackend.h" +#include "core/providers/openvino/backend_utils.h" namespace onnxruntime { namespace openvino_ep { @@ -21,8 +21,17 @@ GlobalContext& BackendManager::GetGlobalContext() { BackendManager::BackendManager(const GlobalContext& global_context, const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, - const logging::Logger& logger) { + const logging::Logger& logger, + EPCtxHandler& ctx_handle) { global_context_ = global_context; + ep_ctx_handle_ = ctx_handle; + + openvino_sdk_version_ = std::to_string(global_context_.OpenVINO_Version.at(0)) + "." + + std::to_string(global_context_.OpenVINO_Version.at(1)); + if (ep_ctx_handle_.CheckForOVEPCtxNode(subgraph, openvino_sdk_version_)) { + if (ep_ctx_handle_.ImportBlobFromEPCtxModel(subgraph) != Status::OK()) + ORT_THROW("Import blob from model failed"); + } auto prec_str = GetGlobalContext().precision_str; @@ -66,7 +75,8 @@ BackendManager::BackendManager(const GlobalContext& global_context, try { concrete_backend_ = BackendFactory::MakeBackend(*model_proto_, GetGlobalContext(), - subgraph_context_); + subgraph_context_, + ep_ctx_handle_); } catch (std::string const& msg) { ORT_THROW(msg); } @@ -85,7 +95,8 @@ BackendManager::BackendManager(const GlobalContext& global_context, try { concrete_backend_ = BackendFactory::MakeBackend(*model_proto_, GetGlobalContext(), - subgraph_context_); + subgraph_context_, + ep_ctx_handle_); } catch (const OnnxRuntimeException& ex) { if (device_type.find("NPU") != std::string::npos) { LOGS_DEFAULT(WARNING) << ex.what(); @@ -96,7 +107,8 @@ BackendManager::BackendManager(const GlobalContext& global_context, try { concrete_backend_ = BackendFactory::MakeBackend(*model_proto_, GetGlobalContext(), - subgraph_context_); + subgraph_context_, + ep_ctx_handle_); } catch (std::string const& msg) { ORT_THROW(msg); } @@ -107,6 +119,45 @@ BackendManager::BackendManager(const GlobalContext& global_context, } } +// Call EPContext model exporter here if the provider option for exporting +// precompiled blob is set. If that's the case: +// By default, create model in embed mode where the blob stream is exported as data within +// the EPContext node. +Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& graph_body_viewer, + const logging::Logger& logger) { + std::string model_blob_str; + auto compiled_model = concrete_backend_->GetOVCompiledModel(); + auto graph_name = global_context_.onnx_model_path_name; + // Remove extension so we can append suffix to form the complete name of output graph + graph_name = [&]() { + size_t dot = graph_name.find_last_of("."); + if (dot == std::string::npos) return graph_name; + return graph_name.substr(0, dot); + }(); + // If embed_mode, then pass on the serialized blob + // If not embed_mode, dump the blob here and only pass on the path to the blob + if (global_context_.ep_context_embed_mode) { + std::ostringstream model_blob_stream; + compiled_model.export_model(model_blob_stream); + model_blob_str = model_blob_stream.str(); + ORT_ENFORCE(model_blob_str.size() != 0); + } else { + std::ofstream f(graph_name + ".blob", std::ios::out | std::ios::trunc | std::ios::binary); + compiled_model.export_model(f); + model_blob_str = graph_name + ".blob"; + } + + ORT_RETURN_IF_ERROR(ep_ctx_handle_.ExportEPCtxModel(graph_body_viewer, + graph_name, + logger, + global_context_.ep_context_embed_mode, + model_blob_str, + openvino_sdk_version_, + GetGlobalContext().device_type)); + + return Status::OK(); +} + bool BackendManager::ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const { bool has_batched_inputs = true; @@ -182,7 +233,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, return model_proto; } -std::vector> GetInputTensorShapes(Ort::KernelContext& context) { +std::vector> GetInputTensorShapes(const Ort::KernelContext& context) { const auto input_count = context.GetInputCount(); std::vector> input_shapes; input_shapes.reserve(input_count); @@ -289,7 +340,8 @@ void BackendManager::Compute(OrtKernelContext* context) { try { dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes, GetGlobalContext(), - subgraph_context_); + subgraph_context_, + ep_ctx_handle_); } catch (const OnnxRuntimeException& ex) { if (GetGlobalContext().device_type.find("NPU") != std::string::npos) { LOGS_DEFAULT(WARNING) << ex.what(); @@ -301,7 +353,8 @@ void BackendManager::Compute(OrtKernelContext* context) { try { dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes, GetGlobalContext(), - subgraph_context_); + subgraph_context_, + ep_ctx_handle_); } catch (std::string const& msg) { ORT_THROW(msg); } diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h index 376ebea225a2b..805fd16b09fde 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.h +++ b/onnxruntime/core/providers/openvino/backend_manager.h @@ -8,9 +8,10 @@ #include #include -#include "ov_interface.h" -#include "contexts.h" -#include "ibackend.h" +#include "core/providers/openvino/ov_interface.h" +#include "core/providers/openvino/contexts.h" +#include "core/providers/openvino/onnx_ctx_model_helper.h" +#include "core/providers/openvino/ibackend.h" namespace onnxruntime { namespace openvino_ep { @@ -21,11 +22,14 @@ class BackendManager { BackendManager(const GlobalContext& global_context, const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, - const logging::Logger& logger); + const logging::Logger& logger, + EPCtxHandler& ctx_handle); void Compute(OrtKernelContext* context); void ShutdownBackendManager(); void SetGlobalCotext(const GlobalContext& global_context); GlobalContext& GetGlobalContext(); + Status ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& subgraph, + const logging::Logger& logger); private: std::unique_ptr GetModelProtoFromFusedNode( @@ -47,6 +51,8 @@ class BackendManager { std::map> backend_map_; SubGraphContext subgraph_context_; GlobalContext global_context_; + EPCtxHandler ep_ctx_handle_{}; + std::string openvino_sdk_version_{}; }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 32b5ad7d5b66d..c64f3041a5069 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -5,11 +5,11 @@ #include #include -#include "ov_interface.h" #include "openvino/pass/convert_fp32_to_fp16.hpp" #include "openvino/pass/constant_folding.hpp" #include "core/providers/shared_library/provider_api.h" -#include "backend_utils.h" +#include "core/providers/openvino/backend_utils.h" +#include "core/providers/openvino/ov_interface.h" using Exception = ov::Exception; diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h index 93fa874774469..13ecb153b98f2 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.h +++ b/onnxruntime/core/providers/openvino/backend_utils.h @@ -12,8 +12,8 @@ #include #include "core/session/onnxruntime_cxx_api.h" -#include "contexts.h" -#include "ov_interface.h" +#include "core/providers/openvino/contexts.h" +#include "core/providers/openvino/ov_interface.h" #ifdef _WIN32 #include #define GetCurrentDir _getcwd diff --git a/onnxruntime/core/providers/openvino/backends/backend_factory.cc b/onnxruntime/core/providers/openvino/backends/backend_factory.cc index a0f4ce8f843b0..ce7e1c9f7c2b4 100644 --- a/onnxruntime/core/providers/openvino/backends/backend_factory.cc +++ b/onnxruntime/core/providers/openvino/backends/backend_factory.cc @@ -5,7 +5,7 @@ #include "core/providers/shared_library/provider_api.h" #include "core/providers/openvino/contexts.h" #include "core/providers/openvino/ibackend.h" -#include "basic_backend.h" +#include "core/providers/openvino/backends/basic_backend.h" namespace onnxruntime { namespace openvino_ep { @@ -13,7 +13,8 @@ namespace openvino_ep { std::shared_ptr BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto, GlobalContext& global_context, - const SubGraphContext& subgraph_context) { + const SubGraphContext& subgraph_context, + EPCtxHandler& ep_ctx_handle) { std::string type = global_context.device_type; if (type == "CPU" || type.find("GPU") != std::string::npos || type.find("NPU") != std::string::npos || @@ -22,7 +23,7 @@ BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto, type.find("AUTO") != std::string::npos) { std::shared_ptr concrete_backend_; try { - concrete_backend_ = std::make_shared(model_proto, global_context, subgraph_context); + concrete_backend_ = std::make_shared(model_proto, global_context, subgraph_context, ep_ctx_handle); } catch (std::string const& msg) { ORT_THROW(msg); } diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 69d234a7c55ef..efaf0ca808a86 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -9,9 +9,10 @@ #include #include "core/providers/shared_library/provider_api.h" -#include "../backend_utils.h" -#include "basic_backend.h" -#include "../backend_manager.h" +#include "core/providers/openvino/backend_utils.h" +#include "core/providers/openvino/backends/basic_backend.h" +#include "core/providers/openvino/onnx_ctx_model_helper.h" +#include "core/providers/openvino/backend_manager.h" namespace onnxruntime { @@ -21,9 +22,13 @@ using namespace backend_utils; BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, GlobalContext& global_context, - const SubGraphContext& subgraph_context) + const SubGraphContext& subgraph_context, + EPCtxHandler& ep_ctx_handle) : global_context_(global_context), subgraph_context_(subgraph_context) { - std::string& hw_target = (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type; + std::string& hw_target = global_context_.device_type; + + is_ep_ctx_graph_ = ep_ctx_handle.IsValidOVEPCtxGraph(); + if (ValidateSubgraph(const_outputs_map_)) return; @@ -50,47 +55,62 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, model_proto.SerializeToOstream(outfile); } #endif + try { std::string dev_prec = global_context.device_type + "_" + global_context_.precision_str; - if (global_context.is_wholly_supported_graph) { + + if (global_context.is_wholly_supported_graph) { // Full graph is supported #if defined(IO_BUFFER_ENABLED) - if ((global_context.device_type.find("GPU") != std::string::npos) && - (global_context_.context != nullptr)) { + if (is_ep_ctx_graph_) { + std::istringstream model_stream(ep_ctx_handle.GetModelBlobString()); + exe_network_ = global_context_.ie_core.ImportModel(model_stream, + remote_context_, + subgraph_context_.subgraph_name); + ie_cnn_network_ = exe_network_.Get().get_runtime_model(); + } else if ((global_context.device_type.find("GPU") != std::string::npos) && + (global_context_.context != nullptr)) { LOGS_DEFAULT(INFO) << log_tag << "IO Buffering Enabled"; cl_context ctx = static_cast(global_context_.context); remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_.ie_core.Get(), ctx); ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_); - exe_network_ = global_context_.ie_core.LoadNetwork( + exe_network_ = global_context_.ie_core.CompileModel( ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name); - LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; + ie_cnn_network_ = exe_network_.Get().get_runtime_model(); } else { ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_); - exe_network_ = global_context_.ie_core.LoadNetwork( + exe_network_ = global_context_.ie_core.CompileModel( ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name); - LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; } -#else - if (!subgraph_context_.has_dynamic_input_shape && - global_context_.onnx_model_path_name != "" && - dev_prec != "CPU_FP16") { - exe_network_ = global_context_.ie_core.LoadNetwork(global_context_.onnx_model_path_name, +#else // !IO_BUFFER_ENABLED + if (is_ep_ctx_graph_) { + // If the blob is held in an EPContext node, then skip FE+Compile + // and directly move on to creating a backend with the executable blob + exe_network_ = global_context_.ie_core.ImportModel(ep_ctx_handle.GetModelBlobStream(), hw_target, device_config, subgraph_context_.subgraph_name); - LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; - } else { + ie_cnn_network_ = exe_network_.Get().get_runtime_model(); + } else if (!subgraph_context_.has_dynamic_input_shape && + global_context_.onnx_model_path_name.find(".onnx") != std::string ::npos) { + // Inputs with static dimenstions + exe_network_ = global_context_.ie_core.CompileModel(global_context_.onnx_model_path_name, + hw_target, + global_context_.cache_dir, + device_config, + subgraph_context_.subgraph_name); + ie_cnn_network_ = exe_network_.Get().get_runtime_model(); + } else { // Inputs with dynamic dimensions ie_cnn_network_ = CreateOVModel(model_proto, global_context_, const_outputs_map_); - exe_network_ = global_context_.ie_core.LoadNetwork( + exe_network_ = global_context_.ie_core.CompileModel( ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name); - LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; } #endif - } else { + } else { // Full graph is not supported ie_cnn_network_ = CreateOVModel(model_proto, global_context_, const_outputs_map_); - exe_network_ = global_context_.ie_core.LoadNetwork( + exe_network_ = global_context_.ie_core.CompileModel( ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name); - LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; } + LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; } catch (const char* msg) { ORT_THROW(msg); } @@ -111,17 +131,34 @@ bool BasicBackend::ValidateSubgraph(std::map= 2024 && global_context_.OpenVINO_Version.at(1) >= 1) { + device_config.emplace(ov::hint::inference_precision(ov::element::undefined)); + device_config.emplace(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)); + } else { + if (global_context_.model_precision != "") + device_config.emplace(ov::hint::inference_precision(global_context_.model_precision)); + } + } #ifndef NDEBUG if (openvino_ep::backend_utils::IsDebugEnabled()) { device_config.emplace(ov::enable_profiling(true)); } #endif + + // Set a priority level for the current workload for preemption; default priority is "DEFAULT" + // CPU Plugin doesn't support workload priority + if (global_context_.device_type.find("CPU") == std::string::npos) + device_config.emplace(ov::hint::model_priority(global_context_.model_priority)); + if (global_context_.device_type.find("NPU") != std::string::npos) { std::pair device_property; device_property = std::make_pair("NPU_COMPILER_TYPE", "DRIVER"); @@ -135,9 +172,12 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) { } void BasicBackend::EnableCaching() { + // cache_dir argument has no effect when working with an embed-mode EPContext Graph + if (is_ep_ctx_graph_) return; + if (!global_context_.cache_dir.empty()) { LOGS_DEFAULT(INFO) << log_tag << "Enables Caching"; - global_context_.ie_core.SetCache(global_context_.cache_dir); + global_context_.ie_core.SetCache(global_context_.cache_dir, global_context_.device_type); } } @@ -152,13 +192,19 @@ void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) { } void BasicBackend::EnableStreams() { + // Return silently for NPU as it's currently treated as a read-only flag by the NPU plugin + // and throws an exception for the same + if (global_context_.device_type.find("NPU") != std::string::npos) + return; + // Streams can be set only if the device is not one of AUTO, MULTI, or HETERO // Throw an exception if the user tries to set num_streams for these devices if ((global_context_.device_type.find("MULTI") != std::string::npos) || (global_context_.device_type.find("HETERO") != std::string::npos) || (global_context_.device_type.find("AUTO") != std::string::npos)) { if (global_context_.num_streams != 1) { - ORT_THROW(log_tag + "Cannot set NUM_STREAMS to " + std::to_string(global_context_.num_streams) + " for device " + global_context_.device_type); + ORT_THROW(log_tag + "Cannot set NUM_STREAMS to " + + std::to_string(global_context_.num_streams) + " for device " + global_context_.device_type); } // Do nothing } else { @@ -493,8 +539,7 @@ void BasicBackend::Infer(OrtKernelContext* ctx) { #ifndef IO_BUFFER_ENABLED // Printing performance counts is disabled when IO_BUFFER_ENABLED if (openvino_ep::backend_utils::IsDebugEnabled()) { inferRequestsQueue_->printstatus(); // Printing the elements of infer_requests_ vector pool only in debug mode - std::string& hw_target = - (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type; + std::string& hw_target = global_context_.device_type; printPerformanceCounts(infer_request, std::cout, hw_target); } #endif diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h index 3502f660bbb20..5565223f067b8 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.h +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h @@ -25,12 +25,15 @@ class BasicBackend : public IBackend { public: BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, GlobalContext& global_context, - const SubGraphContext& subgraph_context); + const SubGraphContext& subgraph_context, + EPCtxHandler& ep_ctx_handle); void Infer(OrtKernelContext* context) override; + ov::CompiledModel& GetOVCompiledModel() override { + return exe_network_.Get(); + } private: - bool ImportBlob(std::string hw_target, bool npu_status); void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&); bool ValidateSubgraph(std::map>& const_outputs_map); void PopulateConfigValue(ov::AnyMap& device_config); @@ -49,10 +52,11 @@ class BasicBackend : public IBackend { GlobalContext& global_context_; SubGraphContext subgraph_context_; mutable std::mutex compute_lock_; - std::shared_ptr ie_cnn_network_; + std::shared_ptr ie_cnn_network_; OVExeNetwork exe_network_; std::map> const_outputs_map_; std::unique_ptr inferRequestsQueue_; + bool is_ep_ctx_graph_{false}; #if defined IO_BUFFER_ENABLED OVRemoteContextPtr remote_context_; #endif diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index 8701d9f676ffd..6d0a558eeae45 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -6,7 +6,7 @@ #include #include #include -#include "ov_interface.h" +#include "core/providers/openvino/ov_interface.h" namespace onnxruntime { namespace openvino_ep { @@ -18,14 +18,16 @@ struct GlobalContext { bool enable_npu_fast_compile = false; bool enable_opencl_throttling = false; bool disable_dynamic_shapes = false; + bool ep_context_embed_mode = true; + bool export_ep_ctx_blob = false; size_t num_of_threads; std::string device_type; std::string precision_str; - std::string device_id; + std::string model_precision; std::string cache_dir; + std::string model_priority = "DEFAULT"; int num_streams; std::vector deviceAvailableList = {true, true, true, true, true, true, true, true}; - std::vector deviceTags = {"0", "1", "2", "3", "4", "5", "6", "7"}; std::string onnx_model_name; std::string onnx_model_path_name; int onnx_opset_version; diff --git a/onnxruntime/core/providers/openvino/ibackend.h b/onnxruntime/core/providers/openvino/ibackend.h index ece855c6167c6..eb0d8e8823896 100644 --- a/onnxruntime/core/providers/openvino/ibackend.h +++ b/onnxruntime/core/providers/openvino/ibackend.h @@ -6,6 +6,7 @@ #include #define ORT_API_MANUAL_INIT #include "core/session/onnxruntime_cxx_api.h" +#include "core/providers/openvino/onnx_ctx_model_helper.h" namespace onnxruntime { namespace openvino_ep { @@ -13,6 +14,7 @@ namespace openvino_ep { class IBackend { public: virtual void Infer(OrtKernelContext* context) = 0; + virtual ov::CompiledModel& GetOVCompiledModel() = 0; }; class BackendFactory { @@ -20,7 +22,8 @@ class BackendFactory { static std::shared_ptr MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto, GlobalContext& global_context, - const SubGraphContext& subgraph_context); + const SubGraphContext& subgraph_context, + EPCtxHandler& ctx_handle); }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc new file mode 100644 index 0000000000000..cd1ae6150e1da --- /dev/null +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc @@ -0,0 +1,123 @@ +// Copyright (C) Intel Corporation +// Licensed under the MIT License + +#include +#include +#include + +#include "core/providers/openvino/onnx_ctx_model_helper.h" + +namespace onnxruntime { +namespace openvino_ep { + +/* Export the serialized blob string embedded onto an EPContext Node + * along with other metadata necessary to validate the graph on import + */ + +Status EPCtxHandler::ExportEPCtxModel(const GraphViewer& graph_viewer, + const std::string& graph_name, + const logging::Logger& logger, + const bool& ep_context_embed_mode, + const std::string& model_blob_str, + const std::string& openvino_sdk_version, + const std::string& device_type) const { + auto model_build = graph_viewer.CreateModel(logger); + auto& graph_build = model_build->MainGraph(); + + // Get graph inputs and outputs + std::vector inputs, outputs; + for (auto input : graph_viewer.GetInputs()) { + auto& n_input = graph_build.GetOrCreateNodeArg(input->Name(), input->TypeAsProto()); + inputs.push_back(&n_input); + } + for (auto output : graph_viewer.GetOutputs()) { + auto& n_output = graph_build.GetOrCreateNodeArg(output->Name(), output->TypeAsProto()); + outputs.push_back(&n_output); + } + + // Create EP context node attributes + auto attr_0 = ONNX_NAMESPACE::AttributeProto::Create(); + auto attr_1 = ONNX_NAMESPACE::AttributeProto::Create(); + auto attr_2 = ONNX_NAMESPACE::AttributeProto::Create(); + auto attr_3 = ONNX_NAMESPACE::AttributeProto::Create(); + + // embed mode + attr_0->set_name(EMBED_MODE); + attr_0->set_type(onnx::AttributeProto_AttributeType_INT); + attr_0->set_i(ep_context_embed_mode); + // ep context + attr_1->set_name(EP_CACHE_CONTEXT); + attr_1->set_type(onnx::AttributeProto_AttributeType_STRING); + attr_1->set_s(model_blob_str); + // sdk version + attr_2->set_name(EP_SDK_VER); + attr_2->set_type(onnx::AttributeProto_AttributeType_STRING); + attr_2->set_s(openvino_sdk_version); + // source + attr_3->set_name(SOURCE); + attr_3->set_type(onnx::AttributeProto_AttributeType_STRING); + attr_3->set_s(kOpenVINOExecutionProvider); + + auto node_attributes = ONNX_NAMESPACE::NodeAttributes::Create(); + node_attributes->reserve(4); + node_attributes->emplace(EMBED_MODE, *attr_0); + node_attributes->emplace(EP_CACHE_CONTEXT, *attr_1); + node_attributes->emplace(EP_SDK_VER, *attr_2); + node_attributes->emplace(SOURCE, *attr_3); + + // Create EP context node + graph_build.AddNode(graph_name, EPCONTEXT_OP, "", inputs, outputs, node_attributes.get(), kMSDomain); + ORT_ENFORCE(graph_build.Resolve().IsOK()); + + // Serialize modelproto to string + auto new_graph_viewer = graph_build.CreateGraphViewer(); + auto model = new_graph_viewer->CreateModel(logger); + auto model_proto = model->ToProto(); + new_graph_viewer->ToProto(*model_proto->mutable_graph(), true, true); + model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); + + // Finally, dump the model + std::ofstream dump(graph_name + "-ov_" + device_type + "_blob.onnx", + std::ios::out | std::ios::trunc | std::ios::binary); + model_proto->SerializeToOstream(dump); + + LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Export blob as EPContext Node"; + + return Status::OK(); +} + +Status EPCtxHandler::ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer) { + auto node = graph_viewer.GetNode(0); + auto& attrs = node->GetAttributes(); + ORT_ENFORCE(attrs.count(EP_CACHE_CONTEXT) > 0); + + model_stream_ = std::make_shared(attrs.at(EP_CACHE_CONTEXT).s()); + + LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node"; + + is_valid_ep_ctx_graph_ = true; + return Status::OK(); +} + +bool EPCtxHandler::CheckForOVEPCtxNode(const GraphViewer& graph_viewer, std::string openvino_sdk_version) const { + for (int i = 0; i < graph_viewer.MaxNodeIndex(); ++i) { + auto node = graph_viewer.GetNode(i); + auto& attrs = node->GetAttributes(); + + // Check for correct Op Type, EP SOURCE, and SDK version + if (node != nullptr && node->OpType() == EPCONTEXT_OP) { + if (attrs.at(SOURCE).s() == kOpenVINOExecutionProvider) { + if (attrs.at(EP_SDK_VER).s() == openvino_sdk_version) { + return true; + } else { + ORT_THROW("[Invalid Graph] Versions of OpenVINO used to export blob (" + attrs.at(EP_SDK_VER).s() + + ") and current runtime (" + openvino_sdk_version + ") don't match."); + } + } + } + } + return false; +} + +} // namespace openvino_ep +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h new file mode 100644 index 0000000000000..b2b9b5bc53d44 --- /dev/null +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h @@ -0,0 +1,45 @@ +// Copyright (C) Intel Corporation +// Licensed under the MIT License + +#pragma once + +#include +#include +#include + +#include "core/providers/shared_library/provider_api.h" + +namespace onnxruntime { +namespace openvino_ep { + +// Utilities to handle EPContext node export and parsing of an EPContext node +// to create the compiled_model object to infer on +static const char EPCONTEXT_OP[] = "EPContext"; +static const char EMBED_MODE[] = "embed_mode"; +static const char EP_CACHE_CONTEXT[] = "ep_cache_context"; +static const char EP_SDK_VER[] = "ep_sdk_version"; +static const char SOURCE[] = "source"; + +class EPCtxHandler { + public: + EPCtxHandler() = default; + EPCtxHandler(const EPCtxHandler&) = default; + Status ExportEPCtxModel(const GraphViewer& graph_viewer, + const std::string& graph_name, + const logging::Logger& logger, + const bool& ep_context_embed_mode, + const std::string& model_blob_str, + const std::string& openvino_sdk_version, + const std::string& device_type) const; + Status ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer); + bool CheckForOVEPCtxNode(const GraphViewer& graph_viewer, std::string openvino_sdk_version) const; + bool IsValidOVEPCtxGraph() const { return is_valid_ep_ctx_graph_; } + [[nodiscard]] const std::shared_ptr GetModelBlobStream() const { return model_stream_; } + + private: + bool is_valid_ep_ctx_graph_{false}; + std::shared_ptr model_stream_; +}; + +} // namespace openvino_ep +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index 913440d2fb6ea..656280114c3bd 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -1,11 +1,13 @@ // Copyright (C) Intel Corporation // Licensed under the MIT License +#include #include "core/providers/shared_library/provider_api.h" -#include "openvino_execution_provider.h" -#include "contexts.h" -#include "backend_manager.h" -#include "ov_versions/capability.h" +#include "core/providers/openvino/openvino_execution_provider.h" +#include "core/providers/openvino/contexts.h" +#include "core/providers/openvino/backend_manager.h" +#include "core/providers/openvino/onnx_ctx_model_helper.h" +#include "core/providers/openvino/ov_versions/capability.h" #include "openvino/core/version.hpp" #define MEMCPY_S(dest, src, destsz, srcsz) memcpy(dest, src, std::min(destsz, srcsz)) @@ -21,18 +23,19 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv global_context_->precision_str = info.precision_; global_context_->enable_npu_fast_compile = info.enable_npu_fast_compile_; global_context_->cache_dir = info.cache_dir_; + global_context_->model_priority = info.model_priority_; global_context_->num_streams = info.num_streams_; global_context_->context = info.context_; global_context_->enable_opencl_throttling = info.enable_opencl_throttling_; global_context_->disable_dynamic_shapes = info.disable_dynamic_shapes_; global_context_->num_of_threads = info.num_of_threads_; global_context_->OpenVINO_Version = {OPENVINO_VERSION_MAJOR, OPENVINO_VERSION_MINOR}; + global_context_->export_ep_ctx_blob = info.export_ep_ctx_blob_; // to check if target device is available // using ie_core capability GetAvailableDevices to fetch list of devices plugged in if (info.cache_dir_.empty()) { bool device_found = false; - bool device_id_found = false; auto available_devices = global_context_->ie_core.GetAvailableDevices(); // Checking for device_type configuration if (info.device_type_ != "") { @@ -40,15 +43,16 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv info.device_type_.find("MULTI") != std::string::npos || info.device_type_.find("AUTO") != std::string::npos) { device_found = true; - } else if (info.device_type_ == "CPU" || info.device_type_.find("GPU") != std::string::npos) { + } else { for (auto device : available_devices) { if (device.rfind(info.device_type_, 0) == 0) { if (info.device_type_.find("GPU") != std::string::npos && (info.precision_ == "FP32" || - info.precision_ == "FP16")) { + info.precision_ == "FP16" || + info.precision_ == "ACCURACY")) { device_found = true; break; } - if (info.device_type_ == "CPU" && (info.precision_ == "FP32" || info.precision_ == "FP16")) { + if (info.device_type_ == "CPU" && (info.precision_ == "FP32")) { device_found = true; break; } @@ -58,51 +62,31 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv } } } - } else { - device_found = true; } } if (!device_found) { - std::string err_msg = std::string("Device Type not found : ") + info.device_type_ + - "\nChoose the right precision with one of:\n"; - for (auto device : available_devices) { - err_msg = err_msg + device + "\n"; - } - ORT_THROW(err_msg); - } - // Checking for device_id configuration - if (info.device_id_ != "") { - for (auto device : available_devices) { - if (device.rfind(info.device_id_, 0) == 0) { - if (info.device_id_ == "CPU" || info.device_id_ == "GPU") { - LOGS_DEFAULT(INFO) << "[OpenVINO-EP]" - << "Switching to Device ID: " << info.device_id_; - device_id_found = true; - break; - } - } - } - if (!device_id_found) { - std::string err_msg = std::string("Device ID not found : ") + info.device_id_ + "\nChoose one of:\n"; - for (auto device : available_devices) { - err_msg = err_msg + device + "\n"; - } - ORT_THROW(err_msg); - } + ORT_THROW("[ERROR] [OpenVINO] Specified device - " + info.device_type_ + " is not available"); } } - global_context_->device_id = info.device_id_; } std::vector> OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer, const IKernelLookup& /*kernel_lookup*/) const { std::vector> result; + + std::string openvino_sdk_version = std::to_string(global_context_->OpenVINO_Version.at(0)) + "." + + std::to_string(global_context_->OpenVINO_Version.at(1)); + + // Check for valid ctx node and maintain state for validity + if (ep_ctx_handle_.CheckForOVEPCtxNode(graph_viewer, openvino_sdk_version)) + ORT_ENFORCE(graph_viewer.NumberOfNodes() == 1, + "[Invalid Graph] EPContext Model with OpenVINO compiled blob should not have more than one node."); + // Enable CI Logs if (!(GetEnvironmentVar("ORT_OPENVINO_ENABLE_CI_LOG").empty())) { std::cout << "In the OpenVINO EP" << std::endl; } - global_context_->onnx_model_name = graph_viewer.Name(); #ifdef _WIN32 std::wstring onnx_path = graph_viewer.ModelPath().ToPathString(); global_context_->onnx_model_path_name = @@ -114,9 +98,26 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer, global_context_->onnx_opset_version = graph_viewer.DomainToVersionMap().at(kOnnxDomain); + global_context_->model_precision = [&](const GraphViewer& graph_viewer) { + // return empty if graph has no inputs or if types are not one of FP32/FP16 + // else assume the type of the first input + if (graph_viewer.GetInputs().empty()) { + return ""; + } else { + auto input_type = graph_viewer.GetInputs()[0]->TypeAsProto()->tensor_type().elem_type(); + if (global_context_->precision_str == "ACCURACY" && global_context_->device_type == "GPU") { + if (input_type == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT) { + return "FP32"; + } else if (input_type == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16) { + return "FP16"; + } + } + } + return ""; + }(graph_viewer); + openvino_ep::GetCapability obj(graph_viewer, - global_context_->device_type, - global_context_->precision_str); + global_context_->device_type); result = obj.Execute(); global_context_->is_wholly_supported_graph = obj.IsWhollySupportedGraph(); @@ -135,8 +136,21 @@ common::Status OpenVINOExecutionProvider::Compile( global_context_->use_api_2 = true; + // During backend creation, we check if user wants to use precompiled blob onnx model or the original model + // For precompiled blob, directly load the model instead of compiling the model + // For original model, check if the user wants to export a model with pre-compiled blob + std::shared_ptr backend_manager = - std::make_shared(*global_context_, fused_node, graph_body_viewer, *GetLogger()); + std::make_shared(*global_context_, + fused_node, + graph_body_viewer, + *GetLogger(), + ep_ctx_handle_); + + if (global_context_->export_ep_ctx_blob && !ep_ctx_handle_.IsValidOVEPCtxGraph()) { + ORT_RETURN_IF_ERROR(backend_manager->ExportCompiledBlobAsEPCtxNode(graph_body_viewer, + *GetLogger())); + } compute_info.create_state_func = [backend_manager](ComputeContext* context, FunctionState* state) { diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h index b0dc881c36f33..75ffb807fe925 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h @@ -9,8 +9,9 @@ #include #include #include +#include -#include "backend_manager.h" +#include "core/providers/openvino/backend_manager.h" namespace onnxruntime { @@ -60,52 +61,54 @@ static std::vector parseDevices(const std::string& device_string) { // Information needed to construct OpenVINO execution providers. struct OpenVINOExecutionProviderInfo { - std::string device_type_; - std::string precision_; - bool enable_npu_fast_compile_; - std::string device_id_; - size_t num_of_threads_; - std::string cache_dir_; - int num_streams_; - void* context_; - bool enable_opencl_throttling_; - bool disable_dynamic_shapes_; - - explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_npu_fast_compile, std::string dev_id, - size_t num_of_threads, std::string cache_dir, int num_streams, - void* context, bool enable_opencl_throttling, - bool disable_dynamic_shapes) - : enable_npu_fast_compile_(enable_npu_fast_compile), - device_id_(dev_id), + std::string device_type_{""}; + std::string precision_{""}; + bool enable_npu_fast_compile_{false}; + size_t num_of_threads_{0}; + std::string cache_dir_{""}; + std::string model_priority_{""}; + int num_streams_{1}; + void* context_{NULL}; + bool enable_opencl_throttling_{false}; + bool disable_dynamic_shapes_{false}; + bool export_ep_ctx_blob_{false}; + + OpenVINOExecutionProviderInfo() = delete; + + explicit OpenVINOExecutionProviderInfo(std::string dev_type, std::string precision, bool enable_npu_fast_compile, + size_t num_of_threads, std::string cache_dir, std::string model_priority, + int num_streams, void* context, bool enable_opencl_throttling, + bool disable_dynamic_shapes, bool export_ep_ctx_blob) + : precision_(precision), + enable_npu_fast_compile_(enable_npu_fast_compile), num_of_threads_(num_of_threads), cache_dir_(cache_dir), + model_priority_(model_priority), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), - disable_dynamic_shapes_(disable_dynamic_shapes) { + disable_dynamic_shapes_(disable_dynamic_shapes), + export_ep_ctx_blob_(export_ep_ctx_blob) { + std::set ov_supported_device_types = {"CPU", "GPU", + "GPU.0", "GPU.1", "NPU"}; if (dev_type == "") { LOGS_DEFAULT(INFO) << "[OpenVINO-EP]" << "No runtime device selection option provided."; -#if defined OPENVINO_CONFIG_CPU_FP32 +#if defined OPENVINO_CONFIG_CPU device_type_ = "CPU"; precision_ = "FP32"; -#elif defined OPENVINO_CONFIG_CPU_FP16 - device_type_ = "CPU"; - precision_ = "FP16"; -#elif defined OPENVINO_CONFIG_GPU_FP32 - device_type_ = "GPU"; - precision_ = "FP32"; -#elif defined OPENVINO_CONFIG_GPU_FP16 +#elif defined OPENVINO_CONFIG_GPU device_type_ = "GPU"; precision_ = "FP16"; #elif defined OPENVINO_CONFIG_NPU device_type_ = "NPU"; - precision_ = ""; + precision_ = "FP16"; #elif defined OPENVINO_CONFIG_HETERO || defined OPENVINO_CONFIG_MULTI || defined OPENVINO_CONFIG_AUTO #ifdef DEVICE_NAME #define DEVICE DEVICE_NAME #endif dev_type = DEVICE; + if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0 || dev_type.find("AUTO") == 0) { std::vector devices = parseDevices(dev_type); precision_ = "FP16"; @@ -115,33 +118,8 @@ struct OpenVINOExecutionProviderInfo { device_type_ = dev_type; } #endif - } else if (dev_type == "CPU_FP32") { - device_type_ = "CPU"; - precision_ = "FP32"; - } else if (dev_type == "CPU_FP16") { - device_type_ = "CPU"; - precision_ = "FP16"; - } else if (dev_type == "GPU_FP32") { - device_type_ = "GPU"; - precision_ = "FP32"; - } else if (dev_type == "GPU.0_FP32") { - device_type_ = "GPU.0"; - precision_ = "FP32"; - } else if (dev_type == "GPU.1_FP32") { - device_type_ = "GPU.1"; - precision_ = "FP32"; - } else if (dev_type == "GPU_FP16") { - device_type_ = "GPU"; - precision_ = "FP16"; - } else if (dev_type == "GPU.0_FP16") { - device_type_ = "GPU.0"; - precision_ = "FP16"; - } else if (dev_type == "GPU.1_FP16") { - device_type_ = "GPU.1"; - precision_ = "FP16"; - } else if (dev_type == "NPU") { - device_type_ = "NPU"; - precision_ = ""; + } else if (ov_supported_device_types.find(dev_type) != ov_supported_device_types.end()) { + device_type_ = dev_type; } else if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0) { std::vector devices = parseDevices(dev_type); precision_ = "FP16"; @@ -159,9 +137,6 @@ struct OpenVINOExecutionProviderInfo { LOGS_DEFAULT(INFO) << "[OpenVINO-EP]" << "Choosing Device: " << device_type_ << " , Precision: " << precision_; } - OpenVINOExecutionProviderInfo() { - OpenVINOExecutionProviderInfo("", false, "", 0, "", 1, NULL, false, false); - } }; struct OpenVINOEPFunctionState { @@ -190,6 +165,7 @@ class OpenVINOExecutionProvider : public IExecutionProvider { private: std::unique_ptr global_context_; + openvino_ep::EPCtxHandler ep_ctx_handle_{}; }; } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index 17511c54aab86..0ba1f50cad54f 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -8,18 +8,22 @@ namespace onnxruntime { struct OpenVINOProviderFactory : IExecutionProviderFactory { - OpenVINOProviderFactory(const char* device_type, bool enable_npu_fast_compile, - const char* device_id, size_t num_of_threads, - const char* cache_dir, int num_streams, void* context, - bool enable_opencl_throttling, bool disable_dynamic_shapes) - : enable_npu_fast_compile_(enable_npu_fast_compile), + OpenVINOProviderFactory(const char* device_type, const char* precision, + bool enable_npu_fast_compile, size_t num_of_threads, + const char* cache_dir, const char* model_priority, + int num_streams, void* context, + bool enable_opencl_throttling, bool disable_dynamic_shapes, + bool export_ep_ctx_blob) + : precision_(precision), + enable_npu_fast_compile_(enable_npu_fast_compile), num_of_threads_(num_of_threads), + model_priority_(model_priority), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), - disable_dynamic_shapes_(disable_dynamic_shapes) { + disable_dynamic_shapes_(disable_dynamic_shapes), + export_ep_ctx_blob_(export_ep_ctx_blob) { device_type_ = (device_type == nullptr) ? "" : device_type; - device_id_ = (device_id == nullptr) ? "" : device_id; cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir; } ~OpenVINOProviderFactory() override { @@ -29,20 +33,22 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory { private: std::string device_type_; + std::string precision_; bool enable_npu_fast_compile_; - std::string device_id_; size_t num_of_threads_; std::string cache_dir_; + std::string model_priority_; int num_streams_; void* context_; bool enable_opencl_throttling_; bool disable_dynamic_shapes_; + bool export_ep_ctx_blob_; }; std::unique_ptr OpenVINOProviderFactory::CreateProvider() { - OpenVINOExecutionProviderInfo info(device_type_, enable_npu_fast_compile_, device_id_, num_of_threads_, - cache_dir_, num_streams_, context_, enable_opencl_throttling_, - disable_dynamic_shapes_); + OpenVINOExecutionProviderInfo info(device_type_, precision_, enable_npu_fast_compile_, num_of_threads_, + cache_dir_, model_priority_, num_streams_, context_, enable_opencl_throttling_, + disable_dynamic_shapes_, export_ep_ctx_blob_); return std::make_unique(info); } @@ -62,44 +68,94 @@ struct OpenVINO_Provider : Provider { std::shared_ptr CreateExecutionProviderFactory(const void* void_params) override { auto& provider_options_map = *reinterpret_cast(void_params); - std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision - // with these values at runtime. - bool enable_npu_fast_compile = false; // [enable_npu_fast_compile]: Fast-compile may be optionally enabled to - // speeds up the model's compilation to NPU device specific format. - const char* device_id = ""; // [device_id]: Selects a particular hardware device for inference. - int num_of_threads = 0; // [num_of_threads]: Overrides the accelerator default value of number of - // threads with this value at runtime. - const char* cache_dir = ""; // [cache_dir]: specify the path to - // dump and load the blobs for the model caching/kernel caching (GPU) - // feature. If blob files are already present, it will be directly loaded. - int num_streams = 1; // [num_streams]: Option that specifies the number of parallel inference - // requests to be processed on a given `device_type`. Overrides the - // accelerator default value of number of streams - // with this value at runtime. - bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU - // device (Reduces CPU Utilization when using GPU) + std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision + // with these values at runtime. + std::string precision = ""; // [precision]: Sets the inference precision for execution. + // Supported precision for devices are CPU=FP32, GPU=FP32,FP16, NPU=FP16. + // Not setting precision will execute with optimized precision for + // best inference latency. set Precision=ACCURACY for executing models + // with input precision for best accuracy. + bool enable_npu_fast_compile = false; // [enable_npu_fast_compile]: Fast-compile may be optionally enabled to + // speeds up the model's compilation to NPU device specific format. + int num_of_threads = 0; // [num_of_threads]: Overrides the accelerator default value of number of + // threads with this value at runtime. + const char* cache_dir = ""; // [cache_dir]: specify the path to + // dump and load the blobs for the model caching/kernel caching (GPU) + // feature. If blob files are already present, it will be directly loaded. + const char* model_priority = "DEFAULT"; // High-level OpenVINO model priority hint + // Defines what model should be provided with more performant + // bounded resource first + int num_streams = 1; // [num_streams]: Option that specifies the number of parallel inference + // requests to be processed on a given `device_type`. Overrides the + // accelerator default value of number of streams + // with this value at runtime. + bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU + // device (Reduces CPU Utilization when using GPU) + bool export_ep_ctx_blob = false; // Whether to export the pre-compiled blob as an EPContext model. + void* context = nullptr; if (provider_options_map.find("device_type") != provider_options_map.end()) { device_type = provider_options_map.at("device_type").c_str(); - std::set ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32", - "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16", - "GPU.0_FP16", "GPU.1_FP16", "NPU"}; + std::set ov_supported_device_types = {"CPU", "GPU", + "GPU.0", "GPU.1", "NPU"}; + std::set deprecated_device_types = {"CPU_FP32", "GPU_FP32", + "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16", + "GPU.0_FP16", "GPU.1_FP16"}; + if (deprecated_device_types.find(device_type) != deprecated_device_types.end()) { + std::string deprecated_device = device_type; + int delimit = device_type.find("_"); + device_type = deprecated_device.substr(0, delimit); + precision = deprecated_device.substr(delimit + 1); + LOGS_DEFAULT(WARNING) << "[OpenVINO] Selected 'device_type' " + deprecated_device + " is deprecated. \n" + << "Update the 'device_type' to specified types 'CPU', 'GPU', 'GPU.0', " + << "'GPU.1', 'NPU' or from" + << " HETERO/MULTI/AUTO options and set 'precision' separately. \n"; + } if (!((ov_supported_device_types.find(device_type) != ov_supported_device_types.end()) || (device_type.find("HETERO:") == 0) || (device_type.find("MULTI:") == 0) || (device_type.find("AUTO:") == 0))) { ORT_THROW( "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. " - "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', " - "'GPU.0_FP16', 'GPU.1_FP16', 'NPU' or from" + "Select from 'CPU', 'GPU', 'GPU.0', 'GPU.1', 'NPU' or from" " HETERO/MULTI/AUTO options available. \n"); } } if (provider_options_map.find("device_id") != provider_options_map.end()) { - device_id = provider_options_map.at("device_id").c_str(); + std::string dev_id = provider_options_map.at("device_id").c_str(); + LOGS_DEFAULT(WARNING) << "[OpenVINO] The options 'device_id' is deprecated. " + << "Upgrade to set deice_type and precision session options.\n"; + if (dev_id == "CPU" || dev_id == "GPU" || dev_id == "NPU") { + device_type = dev_id; + } else { + ORT_THROW("[ERROR] [OpenVINO] Unsupported device_id is selected. Select from available options."); + } } + if (provider_options_map.find("precision") != provider_options_map.end()) { + precision = provider_options_map.at("precision").c_str(); + } + if (device_type == "CPU") { + if (precision == "" || precision == "ACCURACY" || precision == "FP32") { + precision = "FP32"; + } else { + ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. CPU only supports FP32 . \n"); + } + } else if (device_type == "NPU") { + if (precision == "" || precision == "ACCURACY" || precision == "FP16") { + precision = "FP16"; + } else { + ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. NPU only supported FP16. \n"); + } + } else if (device_type == "GPU") { + if (precision == "") { + precision = "FP16"; + } else if (precision != "ACCURACY" && precision != "FP16" && precision != "FP32") { + ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. GPU only supports FP32 / FP16. \n"); + } + } + if (provider_options_map.find("cache_dir") != provider_options_map.end()) { cache_dir = provider_options_map.at("cache_dir").c_str(); } @@ -119,6 +175,18 @@ struct OpenVINO_Provider : Provider { } } + if (provider_options_map.find("model_priority") != provider_options_map.end()) { + model_priority = provider_options_map.at("model_priority").c_str(); + std::vector supported_priorities({"LOW", "MEDIUM", "HIGH", "DEFAULT"}); + if (std::find(supported_priorities.begin(), supported_priorities.end(), + model_priority) == supported_priorities.end()) { + model_priority = "DEFAULT"; + LOGS_DEFAULT(WARNING) << "[OpenVINO-EP] The value for the key 'model_priority' " + << "is not one of LOW, MEDIUM, HIGH, DEFAULT. " + << "Executing with model_priorty=DEFAULT"; + } + } + if (provider_options_map.find("num_streams") != provider_options_map.end()) { num_streams = std::stoi(provider_options_map.at("num_streams")); if (num_streams <= 0) { @@ -154,26 +222,38 @@ struct OpenVINO_Provider : Provider { } if (provider_options_map.find("disable_dynamic_shapes") != provider_options_map.end()) { bool_flag = provider_options_map.at("disable_dynamic_shapes"); - if (bool_flag == "true" || bool_flag == "True") + if (bool_flag == "true" || bool_flag == "True") { disable_dynamic_shapes = true; - else if (bool_flag == "false" || bool_flag == "False") { + } else if (bool_flag == "false" || bool_flag == "False") { if (device_type.find("NPU") != std::string::npos) { disable_dynamic_shapes = true; - LOGS_DEFAULT(INFO) << "[OpenVINO-EP] The value for the key 'disable_dynamic_shapes' will be set to TRUE for NPU backend.\n "; + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] The value for the key 'disable_dynamic_shapes' will be set to " + << "TRUE for NPU backend.\n "; } else { disable_dynamic_shapes = false; } } } + + if (provider_options_map.find("export_ep_ctx_blob") != provider_options_map.end()) { + bool_flag = provider_options_map.at("export_ep_ctx_blob"); + if (bool_flag == "true" || bool_flag == "True") + export_ep_ctx_blob = true; + else if (bool_flag == "false" || bool_flag == "False") + export_ep_ctx_blob = false; + bool_flag = ""; + } return std::make_shared(const_cast(device_type.c_str()), + const_cast(precision.c_str()), enable_npu_fast_compile, - device_id, num_of_threads, cache_dir, + model_priority, num_streams, context, enable_opencl_throttling, - disable_dynamic_shapes); + disable_dynamic_shapes, + export_ep_ctx_blob); } void Initialize() override { diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index d7c6654c90f81..1ada1e1cc9d17 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -1,12 +1,12 @@ // Copyright (C) Intel Corporation // Licensed under the MIT License -#include "ov_interface.h" -#include +#include "core/providers/openvino/ov_interface.h" + #define ORT_API_MANUAL_INIT #include "core/session/onnxruntime_cxx_api.h" #include "core/providers/shared_library/provider_api.h" -#include "backend_utils.h" +#include "core/providers/openvino/backend_utils.h" using Exception = ov::Exception; @@ -14,6 +14,38 @@ namespace onnxruntime { namespace openvino_ep { const std::string log_tag = "[OpenVINO-EP] "; + +#ifndef NDEBUG +void printDebugInfo(const ov::CompiledModel& obj) { + if (onnxruntime::openvino_ep::backend_utils::IsDebugEnabled()) { + // output of the actual settings that the device selected + auto supported_properties = obj.get_property(ov::supported_properties); + std::cout << "Model:" << std::endl; + for (const auto& cfg : supported_properties) { + if (cfg == ov::supported_properties) + continue; + auto prop = obj.get_property(cfg); + if (cfg == ov::device::properties) { + auto devices_properties = prop.as(); + for (auto& item : devices_properties) { + std::cout << " " << item.first << ": " << std::endl; + for (auto& item2 : item.second.as()) { + OPENVINO_SUPPRESS_DEPRECATED_START + if (item2.first == ov::supported_properties || item2.first == "SUPPORTED_CONFIG_KEYS)" || + item2.first == "SUPPORTED_METRICS") + continue; + OPENVINO_SUPPRESS_DEPRECATED_END + std::cout << " " << item2.first << ": " << item2.second.as() << std::endl; + } + } + } else { + std::cout << " " << cfg << ": " << prop.as() << std::endl; + } + } + } +} +#endif + std::shared_ptr OVCore::ReadModel(const std::string& model, const std::string& model_path) const { try { std::istringstream modelStringStream(model); @@ -37,41 +69,42 @@ std::shared_ptr OVCore::ReadModel(const std::string& model, const std } } -OVExeNetwork OVCore::LoadNetwork(std::shared_ptr& ie_cnn_network, - std::string& hw_target, - ov::AnyMap& device_config, - std::string name) { +OVExeNetwork OVCore::CompileModel(std::shared_ptr& ie_cnn_network, + std::string& hw_target, + ov::AnyMap& device_config, + std::string name) { ov::CompiledModel obj; try { obj = oe.compile_model(ie_cnn_network, hw_target, device_config); - #ifndef NDEBUG - if (onnxruntime::openvino_ep::backend_utils::IsDebugEnabled()) { - // output of the actual settings that the device selected - auto supported_properties = obj.get_property(ov::supported_properties); - std::cout << "Model:" << std::endl; - for (const auto& cfg : supported_properties) { - if (cfg == ov::supported_properties) - continue; - auto prop = obj.get_property(cfg); - if (cfg == ov::device::properties) { - auto devices_properties = prop.as(); - for (auto& item : devices_properties) { - std::cout << " " << item.first << ": " << std::endl; - for (auto& item2 : item.second.as()) { - OPENVINO_SUPPRESS_DEPRECATED_START - if (item2.first == ov::supported_properties || item2.first == "SUPPORTED_CONFIG_KEYS)" || - item2.first == "SUPPORTED_METRICS") - continue; - OPENVINO_SUPPRESS_DEPRECATED_END - std::cout << " " << item2.first << ": " << item2.second.as() << std::endl; - } - } - } else { - std::cout << " " << cfg << ": " << prop.as() << std::endl; - } - } + printDebugInfo(obj); +#endif + OVExeNetwork exe(obj); + return exe; + } catch (const Exception& e) { + ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what()); + } catch (...) { + ORT_THROW(log_tag + " Exception while Loading Network for graph " + name); + } +} + +OVExeNetwork OVCore::CompileModel(const std::string onnx_model_path, + std::string& hw_target, + std::string cache_dir, + ov::AnyMap& device_config, + std::string name) { + ov::CompiledModel obj; + try { + if (hw_target == "AUTO:GPU,CPU") { + obj = oe.compile_model(onnx_model_path, + "AUTO", + ov::device::priorities("GPU", "CPU"), + ov::device::properties("GPU", ov::cache_dir(cache_dir))); + } else { + obj = oe.compile_model(onnx_model_path, hw_target, device_config); } +#ifndef NDEBUG + printDebugInfo(obj); #endif OVExeNetwork exe(obj); return exe; @@ -82,13 +115,15 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr& ie_cnn_network, } } -OVExeNetwork OVCore::LoadNetwork(const std::string onnx_model_path, +OVExeNetwork OVCore::ImportModel(std::shared_ptr model_stream, std::string& hw_target, ov::AnyMap& device_config, std::string name) { - ov::CompiledModel obj; try { - obj = oe.compile_model(onnx_model_path, hw_target, device_config); + auto obj = oe.import_model(*model_stream, hw_target, device_config); +#ifndef NDEBUG + printDebugInfo(obj); +#endif OVExeNetwork exe(obj); return exe; } catch (const Exception& e) { @@ -98,14 +133,20 @@ OVExeNetwork OVCore::LoadNetwork(const std::string onnx_model_path, } } -void OVCore::SetCache(std::string cache_dir_path) { - oe.set_property(ov::cache_dir(cache_dir_path)); +void OVCore::SetCache(std::string cache_dir_path, std::string device_type) { + if (device_type == "AUTO:GPU,CPU") { + oe.set_property(ov::cache_dir(cache_dir_path)); + } } #ifdef IO_BUFFER_ENABLED -OVExeNetwork OVCore::LoadNetwork(std::shared_ptr& model, OVRemoteContextPtr context, std::string& name) { +OVExeNetwork OVCore::CompileModel(std::shared_ptr& model, + OVRemoteContextPtr context, std::string& name) { try { auto obj = oe.compile_model(model, *context); +#ifndef NDEBUG + printDebugInfo(obj); +#endif return OVExeNetwork(obj); } catch (const Exception& e) { ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what()); @@ -113,6 +154,21 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr& model, OVRemoteCont ORT_THROW(log_tag + " Exception while Loading Network for graph " + name); } } +OVExeNetwork OVCore::ImportModel(std::shared_ptr model_stream, + OVRemoteContextPtr context, std::string& name) { + try { + auto obj = oe.import_model(*model_stream, *context); +#ifndef NDEBUG + printDebugInfo(obj); +#endif + OVExeNetwork exe(obj); + return exe; + } catch (const Exception& e) { + ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what()); + } catch (...) { + ORT_THROW(log_tag + " Exception while Loading Network for graph " + name); + } +} #endif std::vector OVCore::GetAvailableDevices() { diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h index 2a13fafb99fd3..f61d3608574da 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.h +++ b/onnxruntime/core/providers/openvino/ov_interface.h @@ -5,6 +5,8 @@ #include #include +#include +#include #include "openvino/openvino.hpp" #include "openvino/pass/convert_fp32_to_fp16.hpp" @@ -38,22 +40,26 @@ class OVCore { public: std::shared_ptr ReadModel(const std::string& model_stream, const std::string& model_path) const; - OVExeNetwork LoadNetwork(std::shared_ptr& ie_cnn_network, + OVExeNetwork CompileModel(std::shared_ptr& ie_cnn_network, + std::string& hw_target, + ov::AnyMap& device_config, + std::string name); + OVExeNetwork CompileModel(const std::string onnx_model_path, + std::string& hw_target, + std::string cache_dir, + ov::AnyMap& device_config, + std::string name); + OVExeNetwork ImportModel(std::shared_ptr model_stream, std::string& hw_target, ov::AnyMap& device_config, std::string name); - OVExeNetwork LoadNetwork(const std::string model_path, - std::string& hw_target, - ov::AnyMap& device_config, - std::string name); - void SetCache(std::string cache_dir_path); #ifdef IO_BUFFER_ENABLED - OVExeNetwork LoadNetwork(std::shared_ptr& model, OVRemoteContextPtr context, std::string& name); + OVExeNetwork CompileModel(std::shared_ptr& model, OVRemoteContextPtr context, std::string& name); + OVExeNetwork ImportModel(std::shared_ptr model_stream, OVRemoteContextPtr context, std::string& name); #endif std::vector GetAvailableDevices(); - ov::Core& Get() { - return oe; - } + void SetCache(std::string cache_dir_path, std::string device_type); + ov::Core& Get() { return oe; } void SetStreams(const std::string& device_type, int num_streams); }; @@ -61,8 +67,8 @@ class OVExeNetwork { ov::CompiledModel obj; public: - explicit OVExeNetwork(ov::CompiledModel md) { obj = md; } - OVExeNetwork() { obj = ov::CompiledModel(); } + explicit OVExeNetwork(ov::CompiledModel md) : obj(md) {} + OVExeNetwork() : obj(ov::CompiledModel()) {} ov::CompiledModel& Get() { return obj; } OVInferRequest CreateInferRequest(); }; @@ -77,8 +83,8 @@ class OVInferRequest { void Infer(); void WaitRequest(); void QueryStatus(); - explicit OVInferRequest(ov::InferRequest obj) { ovInfReq = obj; } - OVInferRequest() { ovInfReq = ov::InferRequest(); } + explicit OVInferRequest(ov::InferRequest obj) : ovInfReq(obj) {} + OVInferRequest() : ovInfReq(ov::InferRequest()) {} ov::InferRequest& GetNewObj() { return ovInfReq; } diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc index 3970bf6ff68a7..714d5b03baae3 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc @@ -1,11 +1,13 @@ // Copyright (C) 2019- Intel Corporation // Licensed under the MIT License +#include +#include #include "core/providers/shared_library/provider_api.h" -#include "../backend_utils.h" -#include "../backend_manager.h" -#include "capability.h" -#include "utils.h" +#include "core/providers/openvino/backend_utils.h" +#include "core/providers/openvino/backend_manager.h" +#include "core/providers/openvino/ov_versions/capability.h" +#include "core/providers/openvino/ov_versions/utils.h" #include "openvino/core/version.hpp" #if defined(_MSC_VER) @@ -25,22 +27,23 @@ namespace openvino_ep { // Constructor GetCapability::GetCapability(const GraphViewer& graph_viewer_param, - const std::string device_type_param, - const std::string device_precision) - : graph_viewer_(graph_viewer_param), device_type_(device_type_param), device_precision_(device_precision) { + const std::string device_type_param) + : graph_viewer_(graph_viewer_param), device_type_(device_type_param) { if (device_type_.find("NPU") != std::string::npos) { - device_type_ = "CPU_FP32"; + device_type_ = "CPU"; } #if OPENVINO_VERSION_MAJOR == 2023 && OPENVINO_VERSION_MINOR == 1 - data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_, device_precision_); + data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_); #elif OPENVINO_VERSION_MAJOR == 2023 && OPENVINO_VERSION_MINOR == 2 - data_ops_ = new DataOps(graph_viewer_, V_2023_2, device_type_, device_precision_); + data_ops_ = new DataOps(graph_viewer_, V_2023_2, device_type_); #elif OPENVINO_VERSION_MAJOR == 2023 && OPENVINO_VERSION_MINOR == 3 - data_ops_ = new DataOps(graph_viewer_, V_2023_3, device_type_, device_precision_); + data_ops_ = new DataOps(graph_viewer_, V_2023_3, device_type_); #elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 0 - data_ops_ = new DataOps(graph_viewer_, V_2024_0, device_type_, device_precision_); + data_ops_ = new DataOps(graph_viewer_, V_2024_0, device_type_); +#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 1 + data_ops_ = new DataOps(graph_viewer_, V_2024_1, device_type_); #else - data_ops_ = new DataOps(graph_viewer_, V_2024_0, device_type_, device_precision_); + data_ops_ = new DataOps(graph_viewer_, V_2024_1, device_type_); #endif } diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.h b/onnxruntime/core/providers/openvino/ov_versions/capability.h index d9fe5a95ef833..a908bf26247fb 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/capability.h +++ b/onnxruntime/core/providers/openvino/ov_versions/capability.h @@ -5,7 +5,7 @@ #include #include #include -#include "data_ops.h" +#include "core/providers/openvino/ov_versions/data_ops.h" namespace onnxruntime { namespace openvino_ep { @@ -14,14 +14,12 @@ class GetCapability { private: const GraphViewer& graph_viewer_; std::string device_type_; - std::string device_precision_; DataOps* data_ops_; bool is_wholly_supported_graph_ = false; public: GetCapability(const GraphViewer& graph_viewer_param, - const std::string device_type_param, - const std::string precision); + const std::string device_type_param); virtual std::vector> Execute(); bool IsWhollySupportedGraph() { return is_wholly_supported_graph_; diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index c7c3e93595719..5d7956f6fb559 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -9,12 +9,12 @@ #include #include "core/providers/shared_library/provider_api.h" -#include "../backend_utils.h" -#include "../backend_manager.h" -#include "data_ops.h" -#include "capability.h" -#include "utils.h" -#include "../ov_interface.h" +#include "core/providers/openvino/backend_utils.h" +#include "core/providers/openvino/backend_manager.h" +#include "core/providers/openvino/ov_interface.h" +#include "core/providers/openvino/ov_versions/data_ops.h" +#include "core/providers/openvino/ov_versions/capability.h" +#include "core/providers/openvino/ov_versions/utils.h" #if defined(_MSC_VER) #pragma warning(disable : 4244 4245 5208) @@ -122,6 +122,7 @@ std::vector supported_op_mode = { {"Dropout", V_2020_4, {"CPU", "GPU"}}, {"Elu", V_2020_4, {"CPU", "GPU"}}, {"Einsum", V_2023_1, {"CPU", "GPU"}}, + {"EPContext", V_2024_0, {"CPU", "GPU", "NPU"}}, {"Equal", V_2020_4, {"CPU", "GPU"}}, {"Erf", V_2020_4, {"CPU", "GPU"}}, {"Exp", V_2020_4, {"CPU", "GPU"}}, @@ -360,238 +361,22 @@ void DataOps::populate_op_mode_supported() { // populate unsupportedmode_t { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, + UnsupportedOpMode obj = {{V_2024_1}, [this](const Node* node, const InitializedTensorSet&) { - // Abs is not supproted with INT8 or INT32 as input data type on GPU - if ((device_id_.find("GPU") != std::string::npos)) { - for (size_t i = 0; i < node->InputDefs().size(); i++) { - if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == - ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8 || - node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == - ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32) - return true; - } - } - return false; - }}; - op_list_.insert({"Abs", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet&) { - // tensor type does not support select last index - auto& attributes = node->GetAttributes(); - auto last_index_arg = - attributes.count("select_last_index") > 0 ? attributes.at("select_last_index").i() - : 0; - if (last_index_arg != 0) - return true; - // tensor type supports float as input for argmax and argmin - if (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() != - ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT) - return true; - return false; - }}; - op_list_.insert({"ArgMax", obj}); - op_list_.insert({"ArgMin", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet&) { - if (device_id_.find("GPU") != std::string::npos) { - // int64 data type is not supported on GPU - const bool data_is_int64 = - node->InputDefs()[0]->Type()->find("int64") != std::string::npos; - return data_is_int64; - } - return false; - }}; - op_list_.insert({"Clip", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet&) { - if (device_id_.find("GPU") != std::string::npos) { - bool if_bias = false; - const auto& attributes = node->GetAttributes(); - auto conv_filter = attributes.find("kernel_shape"); - if (conv_filter != attributes.end()) { - auto& ints = conv_filter->second().ints(); - // check if the Input for the op has bias - if (node->InputDefs().size() > 2) { - if (node->InputDefs()[2]->Name() == "B") - if_bias = true; - } - // If the kernel size is 3D and the input doesnot have bias, - // the op is rejected in case of GPU - if (ints.size() == 3 && !if_bias) - return true; - } - } - return false; - }}; - op_list_.insert({"Conv", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet&) { - if (device_id_.find("GPU") != std::string::npos) { - // If the device is GPU, only 2D dilations with 1x1 pixel are supported - const auto& attributes = node->GetAttributes(); - auto dilation = attributes.find("dilations"); - if (dilation != attributes.end()) { - auto& dilation_attr = attributes.at("dilations"); - auto int_size = dilation_attr.ints_size(); - if (int_size == 2) { - if (dilation_attr.ints(0) != 1 || dilation_attr.ints(1) != 1) { - return true; - } - } - // If 3D dilations, reject the op - if (int_size == 3) - return true; - } - auto group_attr = attributes.find("group"); - // group 4 is not supported - if (group_attr->second().i() == 4) - return true; - } - return false; - }}; - op_list_.insert({"ConvTranspose", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_3}, - [this](const Node* node, const InitializedTensorSet&) { - if (device_id_.find("GPU") != std::string::npos && node->OpType() == "If") { - // Only Equal op is supported as input for IF op in GPU - for (auto nit = node->InputNodesBegin(); nit != node->InputNodesEnd(); ++nit) { - if (nit->OpType() == "Equal") { - return false; - } - } - } - return true; - }}; - op_list_.insert({"If", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet&) { - const auto& attributes = node->GetAttributes(); - // dilations attrs are not supported yet for Maxpool - if (attributes.find("dilations") != attributes.end()) - return true; - return (!this->dimension_unsupported(node)); - }}; - op_list_.insert({"MaxPool", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet&) { - if (device_id_.find("GPU") != std::string::npos) { - auto x_data_type = node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); - auto y_data_type = node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type(); - // currently both inputs with int32 are not supported - // and also both input datatypes should be same - const bool A_is_int32 = - node->InputDefs()[0]->Type()->find("int32") != std::string::npos; - const bool B_is_int32 = - node->InputDefs()[1]->Type()->find("int32") != std::string::npos; - if ((A_is_int32 && B_is_int32) || (x_data_type != y_data_type)) + // If the Input of ReduceMax op is UINT8, it is rejected (Due to output mismatch) + for (size_t i = 0; i < node->InputDefs().size(); i++) { + if ((node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == + ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8) || + (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == + ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8)) return true; } return false; }}; - op_list_.insert({"Mod", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet&) { - if (device_id_.find("GPU") != std::string::npos) { - auto x_data_type = node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); - auto y_data_type = node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type(); - return x_data_type != y_data_type; - } - // currently both inputs with int32 or int64 datatype are not supported - const bool A_is_int32 = node->InputDefs()[0]->Type()->find("int32") != std::string::npos; - const bool B_is_int32 = node->InputDefs()[1]->Type()->find("int32") != std::string::npos; - const bool A_is_int64 = node->InputDefs()[0]->Type()->find("int64") != std::string::npos; - const bool B_is_int64 = node->InputDefs()[1]->Type()->find("int64") != std::string::npos; - if ((A_is_int32 && B_is_int32) || (A_is_int64 && B_is_int64)) - return true; - return false; - }}; - op_list_.insert({"Pow", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet&) { - // Max op with one input is not supporting for GPU_FP16 - if (device_id_.find("GPU") != std::string::npos) { - if (device_precision_ == "FP16") { - if (node->InputDefs().size() == 1) { - return true; - } - } - } - return false; - }}; - op_list_.insert({"Max", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet&) { - // Min op with one input is not supporting for GPU_FP16 - if (device_id_.find("GPU") != std::string::npos) { - if (device_precision_ == "FP16") { - if (node->InputDefs().size() == 1) { - return true; - } - } - } - return false; - }}; - op_list_.insert({"Min", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet&) { - // Sum op with one input is not supporting for GPU_FP16 - if (device_id_.find("GPU") != std::string::npos) { - if (device_precision_ == "FP16") { - if (node->InputDefs().size() == 1) { - return true; - } - } - } - return false; - }}; - op_list_.insert({"Sum", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet& initializers) { - if (device_id_.find("GPU") != std::string::npos) { - auto slope = node->InputDefs()[1]; - // PRelu slope has to be an initializer or needs to come from a constant node - if (initializers.count(slope->Name())) { - return false; - } else { - for (auto input_node = node->InputNodesBegin(); - input_node != node->InputNodesEnd(); ++input_node) { - if (GetInputCount( - this->graph_viewer_.GetNode((*input_node).Index()), initializers) == 0) - return false; - } - } - } - return true; - }}; - op_list_.insert({"PRelu", obj}); + op_list_.insert({"ReduceMax", obj}); } { - UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0}, + UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1}, [this](const Node* node, const InitializedTensorSet&) { const auto& input_arg = node->InputDefs()[1]; auto shape = input_arg->Shape(); @@ -608,105 +393,7 @@ void DataOps::populate_op_mode_supported() { op_list_.insert({"Reshape", obj}); } { - UnsupportedOpMode obj = {{V_2022_1}, - [this](const Node* node, const InitializedTensorSet&) { - auto& attributes = node->GetAttributes(); - if (attributes.count("mode") == 1 && attributes.at("mode").s() == "linear") { - if (node->InputDefs().size() == 4) { - return true; - } - } - return false; - }}; - op_list_.insert({"Resize", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet&) { - if (device_id_.find("GPU") != std::string::npos) { - // INT32 dataype is not supported as input - for (size_t i = 0; i < node->InputDefs().size(); i++) { - if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == - ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32) - return true; - } - } - return false; - }}; - op_list_.insert({"ReduceLogSumExp", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet&) { - if (device_id_.find("GPU") != std::string::npos) { - auto output_data_type = - node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); - // If the output of ScatterND op is BOOL, it is rejected for GPU. - if (output_data_type == - ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL) - return true; - } - return false; - }}; - op_list_.insert({"ScatterND", obj}); - op_list_.insert({"ScatterElements", obj}); - op_list_.insert({"Scatter", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet&) { - // If the Input of Shrink op is UINT8, it is rejected (Due to output mismatch) - for (size_t i = 0; i < node->InputDefs().size(); i++) { - if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == - ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8) - return true; - } - return false; - }}; - op_list_.insert({"Shrink", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, - [this](const Node* node, const InitializedTensorSet& initializers) { - // start, end, axes need to be a initializer - bool cond_for_slice = false; - const auto& data_arg = node->InputDefs()[0]; - auto graph_inputs = graph_viewer_.GetInputs(); - - auto it = find(graph_inputs.begin(), graph_inputs.end(), data_arg); - if (it != graph_inputs.end()) { - if (node->InputDefs().size() > 1) { - const auto& start_arg = node->InputDefs()[1]; - const auto& end_arg = node->InputDefs()[2]; - cond_for_slice |= initializers.find(start_arg->Name()) == initializers.end(); - cond_for_slice |= initializers.find(end_arg->Name()) == initializers.end(); - } - if (node->InputDefs().size() > 3) { - const auto& axes_arg = node->InputDefs()[3]; - cond_for_slice |= initializers.find(axes_arg->Name()) == initializers.end(); - } - } - - return cond_for_slice; - }}; - op_list_.insert({"Slice", obj}); - } - { - UnsupportedOpMode obj = {{V_2022_1, V_2022_2}, - [this](const Node* node, const InitializedTensorSet&) { - if (device_id_.find("GPU") != std::string::npos) { - if (node->InputDefs().size() > 1 && - (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() == - ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)) { - return true; - } - } - return false; - }}; - op_list_.insert({"Squeeze", obj}); - } - { - UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0}, + UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1}, [this](const Node* node, const InitializedTensorSet&) { // If the operator is unsqueeze // If axes is an input, then we cannot produce a static graph. @@ -721,7 +408,7 @@ void DataOps::populate_op_mode_supported() { op_list_.insert({"Unsqueeze", obj}); } { - UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0}, + UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1}, [this](const Node* node, const InitializedTensorSet&) { // check for attributes auto& upsample_attr = node->GetAttributes(); diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h index 0990904908111..89b738de1d980 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h @@ -27,7 +27,8 @@ enum versionNum { V_2023_1, V_2023_2, V_2023_3, - V_2024_0 + V_2024_0, + V_2024_1 }; using VersionNum = enum versionNum; @@ -62,8 +63,8 @@ class DataOps { std::set supported_types_initializer_; protected: - virtual void populate_op_mode_supported(); - virtual void populate_types_supported(); + void populate_op_mode_supported(); + void populate_types_supported(); bool op_is_supported(std::string name, std::vector& list); bool dimension_unsupported(const Node* node); bool unsupported_op_mode(const Node* node); @@ -71,8 +72,9 @@ class DataOps { bool node_is_supported(const NodeIndex node_idx); public: - DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, const std::string dev_id, const std::string device_precision) - : graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id), device_precision_(device_precision) { + DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, + const std::string dev_id) + : graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id) { populate_op_mode_supported(); populate_types_supported(); } diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index fda41161ac40a..507c094422509 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1669,9 +1669,6 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O ov_options_converted_map["enable_npu_fast_compile"] = "true"; } - if (legacy_ov_options->device_id != nullptr) - ov_options_converted_map["device_id"] = legacy_ov_options->device_id; - if (legacy_ov_options->num_of_threads != '\0') ov_options_converted_map["num_of_threads"] = std::to_string(legacy_ov_options->num_of_threads); @@ -1694,6 +1691,8 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O // Add new provider option below ov_options_converted_map["num_streams"] = "1"; + ov_options_converted_map["export_ep_ctx_blob"] = "false"; + ov_options_converted_map["model_priority"] = "DEFAULT"; return ov_options_converted_map; } @@ -1703,7 +1702,6 @@ std::shared_ptr OpenVINOProviderFactoryCreator::Creat } std::shared_ptr OpenVINOProviderFactoryCreator::Create(const ProviderOptions* provider_options_map) { - // std::cout << provider_options_map.at("num_streams") << std::endl; return s_library_openvino.Get().CreateExecutionProviderFactory(provider_options_map); } diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index f8668be50a962..236d2cfeb2b33 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -928,6 +928,9 @@ std::unique_ptr CreateExecutionProviderInstance( if (option.first == "device_type") { OV_provider_options_map[option.first] = option.second; continue; + } else if (option.first == "precision") { + OV_provider_options_map[option.first] = option.second; + continue; } else if (option.first == "enable_npu_fast_compile") { if (!(option.second == "True" || option.second == "true" || option.second == "False" || option.second == "false")) { @@ -960,10 +963,10 @@ std::unique_ptr CreateExecutionProviderInstance( value = "true"; } OV_provider_options_map["disable_dynamic_shapes"] = value; - } else if (option.first == "device_id") { + } else if (option.first == "num_of_threads") { OV_provider_options_map[option.first] = option.second; continue; - } else if (option.first == "num_of_threads") { + } else if (option.first == "model_priority") { OV_provider_options_map[option.first] = option.second; continue; } else if (option.first == "num_streams") { @@ -975,6 +978,9 @@ std::unique_ptr CreateExecutionProviderInstance( } else if (option.first == "context") { OV_provider_options_map[option.first] = option.second; continue; + } else if (option.first == "export_ep_ctx_blob") { + OV_provider_options_map[option.first] = option.second; + continue; } else { ORT_THROW("Invalid OpenVINO EP option: ", option.first); } diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h index 22314610dbee9..dc9394a83a4ea 100644 --- a/onnxruntime/python/onnxruntime_pybind_state_common.h +++ b/onnxruntime/python/onnxruntime_pybind_state_common.h @@ -48,17 +48,11 @@ struct OrtStatus { #endif #ifdef USE_OPENVINO -#if OPENVINO_CONFIG_CPU_FP32 -#define BACKEND_OPENVINO "-OPENVINO_CPU_FP32" +#if OPENVINO_CONFIG_CPU +#define BACKEND_OPENVINO "-OPENVINO_CPU" -#elif OPENVINO_CONFIG_CPU_FP16 -#define BACKEND_OPENVINO "-OPENVINO_CPU_FP16" - -#elif OPENVINO_CONFIG_GPU_FP32 -#define BACKEND_OPENVINO "-OPENVINO_GPU_FP32" - -#elif OPENVINO_CONFIG_GPU_FP16 -#define BACKEND_OPENVINO "-OPENVINO_GPU_FP16" +#elif OPENVINO_CONFIG_GPU +#define BACKEND_OPENVINO "-OPENVINO_GPU" #elif OPENVINO_CONFIG_NPU #define BACKEND_OPENVINO "-OPENVINO_NPU" diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index b05f58a4e75b2..62291762f61b8 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -78,7 +78,7 @@ namespace perftest { "\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n" "\t [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n" "\t [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n" - "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"\"\"\n" + "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"\"\"\n" "\n" "\t [QNN only] [backend_path]: QNN backend path. e.g '/folderpath/libQnnHtp.so', '/folderpath/libQnnCpu.so'.\n" "\t [QNN only] [profiling_level]: QNN profiling level, options: 'basic', 'detailed', default 'off'.\n" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index f93efba81106b..4067f50ebc1df 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -245,11 +245,15 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device auto value = token.substr(pos + 1); if (key == "device_type") { - std::set ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32", - "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16", - "GPU.0_FP16", "GPU.1_FP16", "NPU"}; + std::set ov_supported_device_types = {"CPU", "GPU", + "GPU.0", "GPU.1", "NPU"}; + std::set deprecated_device_types = {"CPU_FP32", "GPU_FP32", + "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16", + "GPU.0_FP16", "GPU.1_FP16"}; if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) { ov_options[key] = value; + } else if (deprecated_device_types.find(value) != deprecated_device_types.end()) { + ov_options[key] = value; } else if (value.find("HETERO:") == 0) { ov_options[key] = value; } else if (value.find("MULTI:") == 0) { @@ -258,13 +262,43 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device ov_options[key] = value; } else { ORT_THROW( - "[ERROR] [OpenVINO] You have selected a wrong configuration value for the key 'device_type'. " - "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', " - "'GPU.0_FP16', 'GPU.1_FP16', 'NPU' or from" + "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. " + "Select from 'CPU', 'GPU', 'GPU.0', 'GPU.1', 'NPU' or from" " HETERO/MULTI/AUTO options available. \n"); } } else if (key == "device_id") { - ov_options[key] = value; + if (value == "CPU" || value == "GPU" || value == "NPU") { + ov_options[key] = value; + } else { + ORT_THROW("[ERROR] [OpenVINO] Unsupported device_id is selected. Select from available options."); + } + } else if (key == "precision") { + auto device_type = ov_options["device_type"]; + if (device_type == "CPU") { + if (value == "" || value == "ACCURACY" || value == "FP32") { + ov_options[key] = "FP32"; + continue; + } else { + ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. CPU only supports FP32 . \n"); + } + } else if (device_type == "NPU") { + if (value == "" || value == "ACCURACY" || value == "FP16") { + ov_options[key] = "FP16"; + continue; + } else { + ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. NPU only supported FP16. \n"); + } + } else if (device_type == "GPU") { + if (value == "") { + ov_options[key] = "FP16"; + continue; + } else if (value == "ACCURACY" || value == "FP16" || value == "FP32") { + ov_options[key] = value; + continue; + } else { + ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. GPU only supported FP32 / FP16. \n"); + } + } } else if (key == "enable_npu_fast_compile") { if (value == "true" || value == "True" || value == "false" || value == "False") { @@ -294,6 +328,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } else { ov_options[key] = value; } + } else if (key == "model_priority") { + ov_options[key] = value; } else if (key == "cache_dir") { ov_options[key] = value; } else if (key == "context") { @@ -304,6 +340,15 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } else { ov_options[key] = value; } + } else if (key == "export_ep_ctx_blob") { + if (value == "true" || value == "True" || + value == "false" || value == "False") { + ov_options[key] = value; + } else { + ORT_THROW( + "[ERROR] [OpenVINO] The value for the key 'export_ep_ctx_blob' " + "should be a boolean i.e. true or false. Default value is false.\n"); + } } else { ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_npu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling', 'disable_dynamic_shapes'] \n"); } diff --git a/onnxruntime/test/providers/cpu/activation/activation_op_test.h b/onnxruntime/test/providers/cpu/activation/activation_op_test.h index 9a74d763a13e3..409409f56c51c 100644 --- a/onnxruntime/test/providers/cpu/activation/activation_op_test.h +++ b/onnxruntime/test/providers/cpu/activation/activation_op_test.h @@ -42,7 +42,7 @@ inline void TestActivationOp(const char* szOp, const std::vector> } // Disabled because of accuracy issues for GPU -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) int leaky = strcmp(szOp, "LeakyRelu"); if (leaky == 0) { excluded_providers.insert(kOpenVINOExecutionProvider); diff --git a/onnxruntime/test/providers/cpu/math/clip_test.cc b/onnxruntime/test/providers/cpu/math/clip_test.cc index b5d5f84df950a..6f81bbbe31d54 100644 --- a/onnxruntime/test/providers/cpu/math/clip_test.cc +++ b/onnxruntime/test/providers/cpu/math/clip_test.cc @@ -23,7 +23,7 @@ TEST(MathOpTest, Clip_6) { {10.0f, 4.4f, 10.0f, -1.3f, 3.5f, 10.0f, -5.4f, 9.3f, 10.0f}); -#if defined(OPENVINO_CONFIG_CPU_FP32) || defined(OPENVINO_CONFIG_CPU_FP16) +#if defined(OPENVINO_CONFIG_CPU) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); #else test.Run(); diff --git a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc index c02486a2ec26f..0d12cb94799c4 100644 --- a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc +++ b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc @@ -156,7 +156,7 @@ TEST(MathOpTest, Add_float) { test.AddInput("B", dims, rhs_values); test.AddOutput("C", dims, out_values); -#if defined(OPENVINO_CONFIG_GPU_GP16) +#if defined(OPENVINO_CONFIG_GPU) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); // OpenVINO: Disabled due to accuracy mismatch for FP16 #else @@ -219,7 +219,7 @@ TEST(MathOpTest, Add_Broadcast_MultidirectionalAB) { test.AddInput("A", {3, 1}, lhs_values); test.AddInput("B", {3}, rhs_values); test.AddOutput("C", {3, 3}, out_values); -#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) +#if defined(OPENVINO_CONFIG_GPU) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO: disabled temporarily due to accurarcy issues @@ -245,7 +245,7 @@ TEST(MathOpTest, Add_Broadcast_MultidirectionalBA) { test.AddInput("A", {3}, lhs_values); test.AddInput("B", {3, 1}, rhs_values); test.AddOutput("C", {3, 3}, out_values); -#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) +#if defined(OPENVINO_CONFIG_GPU) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO: disabled temporarily due to accurarcy issues @@ -423,8 +423,8 @@ TEST(MathOpTest, Add_Broadcast_2x1x1_3x4) { std::unordered_set excluded_providers; excluded_providers.insert(kTensorrtExecutionProvider); -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) - // OpenVINO GPU: Disabled temporarily due to accuarcy issues +#if defined(OPENVINO_CONFIG_GPU) + // OpenVINO GPU: Disabled temporarily due to accuracy issues // OpenVINO VPU: Disabled due to software limitation excluded_providers.insert(kOpenVINOExecutionProvider); #endif @@ -726,7 +726,7 @@ TEST(MathOpTest, Ceil) { test.AddOutput("Y", dims, {-1.0f, 1.0f, 0.0f, 11.0f}); -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) // OpenVINO: Disabled due to software limitation for GPU and VPU Plugins. // This test runs fine on CPU Plugin test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); @@ -744,7 +744,7 @@ TEST(MathOpTest, Ceil_double) { test.AddOutput("Y", dims, {-1.0, 1.0, 0.0, 11.0}); -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) // OpenVINO: Disabled due to software limitation for GPU and VPU Plugins. // This test runs fine on CPU Plugin test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); @@ -1195,7 +1195,7 @@ TEST(MathOpTest, Sum_6) { -6.0f, 6.6f, 28.0f, -1.0f, 0.06f, 0.25f}); -#if defined(OPENVINO_CONFIG_GPU_FP16) +#if defined(OPENVINO_CONFIG_GPU) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); // OpenVINO EP: Disabled due to accuracy mismatch for FP16 #else test.Run(); @@ -1222,7 +1222,7 @@ TEST(MathOpTest, Sum_6_double) { -6.0, 6.6, 28.0, -1.0, 0.06, 0.25}); -#if defined(OPENVINO_CONFIG_GPU_FP16) +#if defined(OPENVINO_CONFIG_GPU) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); // OpenVINO EP: Disabled due to accuracy mismatch for FP16 #else test.Run(); @@ -1246,7 +1246,7 @@ TEST(MathOpTest, Sum_8_Test1) { 311.0f, 312.0f, 313.0f, 321.0f, 322.0f, 323.0f, 331.0f, 332.0f, 333.0f}); -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) // OpenVINO: Disabled due to software limitation for GPU and VPU Plugins. // This test runs fine on CPU Plugin test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); @@ -1272,7 +1272,7 @@ TEST(MathOpTest, Sum_8_Test1_double) { 311.0, 312.0, 313.0, 321.0, 322.0, 323.0, 331.0, 332.0, 333.0}); -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) // OpenVINO: Disabled due to software limitation for GPU and VPU Plugins. // This test runs fine on CPU Plugin test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); @@ -1306,7 +1306,7 @@ TEST(MathOpTest, Sum_8_Test2) { 3.3f, 4.4f, -94.7f, 59.6f, 64.01f, -8.0f}); -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) // OpenVINO: Disabled temporarily due to accuracy issues test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // TensorRT: Input batch size is inconsistent #else @@ -1340,7 +1340,7 @@ TEST(MathOpTest, Sum_8_Test2_double) { 3.3, 4.4, -94.7, 59.6, 64.01, -8.0}); -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) // OpenVINO: Disabled temporarily due to accuracy issues test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // TensorRT: Input batch size is inconsistent #else diff --git a/onnxruntime/test/providers/cpu/math/gemm_test.cc b/onnxruntime/test/providers/cpu/math/gemm_test.cc index 1a542fb67418e..7ec84d87b2a8b 100644 --- a/onnxruntime/test/providers/cpu/math/gemm_test.cc +++ b/onnxruntime/test/providers/cpu/math/gemm_test.cc @@ -366,7 +366,7 @@ TYPED_TEST(GemmOpTypedTests, TestGemmBroadcast) { static_cast(-9.0f), static_cast(-8.0f), static_cast(-7.0f)}); std::unordered_set excluded_providers; -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) excluded_providers.insert(kOpenVINOExecutionProvider); // OpenVINO: Temporarily disabled due to accuracy issues #endif @@ -405,7 +405,7 @@ TYPED_TEST(GemmOpTypedTests, TestGemmTrans) { test.AddOutput("Y", {2, 3}, {static_cast(11.0f), static_cast(11.0f), static_cast(11.0f), static_cast(-9.0f), static_cast(-9.0f), static_cast(-9.0f)}); -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) test.ConfigExcludeEps({kOpenVINOExecutionProvider}); // OpenVINO: Temporarily disabled due to accuracy issues #endif test.Config(run_with_tunable_op) @@ -431,7 +431,7 @@ TYPED_TEST(GemmOpTypedTests, TestGemmTransB) { test.AddOutput("Y", {2, 3}, {static_cast(11.0f), static_cast(11.0f), static_cast(11.0f), static_cast(-9.0f), static_cast(-9.0f), static_cast(-9.0f)}); -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) test.ConfigExcludeEps({kOpenVINOExecutionProvider}); // OpenVINO: Temporarily disabled due to accuracy issues #endif test.Config(run_with_tunable_op) @@ -461,7 +461,7 @@ TYPED_TEST(GemmOpTypedTests, TestGemmTransB_1) { test.AddOutput("Y", {2, 3}, {static_cast(11.0f), static_cast(11.0f), static_cast(11.0f), static_cast(-9.0f), static_cast(-9.0f), static_cast(-9.0f)}); -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) test.ConfigExcludeEps({kOpenVINOExecutionProvider}); // OpenVINO: Temporarily disabled due to accuracy issues #endif test.Config(run_with_tunable_op) @@ -491,7 +491,7 @@ TYPED_TEST(GemmOpTypedTests, TestGemmAlpha) { // test.AddOutput("Y", {2, 3}, // {5.0f, 5.0f, 5.0f, // -5.0f, -5.0f, -5.0f}); -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) test.ConfigExcludeEps({kOpenVINOExecutionProvider}); // OpenVINO: Temporarily disabled due to accuracy issues #else test.ConfigExcludeEps({kTensorrtExecutionProvider}); // TensorRT: Seg fault in parser @@ -516,7 +516,7 @@ TYPED_TEST(GemmOpTypedTests, TestGemmBeta) { test.AddOutput("Y", {2, 3}, {static_cast(12.0f), static_cast(12.0f), static_cast(12.0f), static_cast(-8.0f), static_cast(-8.0f), static_cast(-8.0f)}); -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) test.ConfigExcludeEps({kOpenVINOExecutionProvider}); // OpenVINO: Temporarily disabled due to accuracy issues #else test.ConfigExcludeEps({kTensorrtExecutionProvider}); // TensorRT: Seg fault in parser @@ -564,7 +564,7 @@ TYPED_TEST(GemmOpTypedTests, TestGemmAlphaBeta) { test.AddOutput("Y", {2, 3}, {static_cast(7.0f), static_cast(7.0f), static_cast(7.0f), static_cast(-3.0f), static_cast(-3.0f), static_cast(-3.0f)}); -#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) test.ConfigExcludeEps({kOpenVINOExecutionProvider}); // OpenVINO: Temporarily disabled due to accuracy issues #else test.ConfigExcludeEps({kTensorrtExecutionProvider}); // TensorRT: Seg fault in parser diff --git a/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc b/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc index d91a1de3faa6e..b0d97410ac9b3 100644 --- a/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc +++ b/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc @@ -50,7 +50,7 @@ void TestBatchNorm(const unordered_map>& input_data_map, } // OpenVINO: Disabled due to software limitations -#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_CPU_FP32) || defined(OPENVINO_CONFIG_CPU_FP16) +#if defined(OPENVINO_CONFIG_GPU) || defined(OPENVINO_CONFIG_CPU) excluded_eps.insert(kOpenVINOExecutionProvider); #endif test.Run(expect_result, err_str, excluded_eps); diff --git a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc index c8cf183291518..885fb11c6e999 100644 --- a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc +++ b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc @@ -346,7 +346,7 @@ TEST(PoolTest, MaxPool2D_uint8) { test.AddInput("Input", {1, 1, 5, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}); test.AddOutput("Output", output_shape, output); -#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) +#if defined(OPENVINO_CONFIG_GPU) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); #else test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}); diff --git a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc index 2902995df1e71..98a65b8efffd2 100644 --- a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc +++ b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc @@ -43,7 +43,7 @@ void TestReduceOp(const std::string& op, test.AddAttribute("keepdims", keepdims); test.AddInput("data", input_dims, data); test.AddOutput("reduced", expected_dims, expected_data); -#if defined(OPENVINO_CONFIG_GPU_FP32) +#if defined(OPENVINO_CONFIG_GPU) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kOpenVINOExecutionProvider, kTensorrtExecutionProvider}); // TensorRT,OpenVINO: result differs #else test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider}); // TensorRT: result differs @@ -1356,7 +1356,7 @@ TEST(ReductionOpTest, ReduceMax_int32) { 11, 12}); test.AddOutput("reduced", {3, 1, 1}, {4, 8, 12}); -#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) +#if defined(OPENVINO_CONFIG_GPU) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO: Disabled temporarily #else test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: axis must be 0 @@ -1377,7 +1377,7 @@ TEST(ReductionOpTest, ReduceMax_int64) { 9, 10, 11, 12}); test.AddOutput("reduced", {3, 1, 1}, {4, 8, 12}); -#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) +#if defined(OPENVINO_CONFIG_GPU) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO: Disabled temporarily #else test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: axis must be 0 diff --git a/onnxruntime/test/providers/cpu/tensor/scatter_op_test.cc b/onnxruntime/test/providers/cpu/tensor/scatter_op_test.cc index b1dfec7951338..6b587be7d74eb 100644 --- a/onnxruntime/test/providers/cpu/tensor/scatter_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/scatter_op_test.cc @@ -289,7 +289,7 @@ static void scatter_bool_with_axis_tests(const char* op_name, int op_version) { test.AddInput("indices", {1, 2}, {1, 3}); test.AddInput("updates", {1, 2}, {true, false}); test.AddOutput("y", {1, 5}, {false, true, false, false, false}); -#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) +#if defined(OPENVINO_CONFIG_GPU) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaNHWCExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO: Disabled due to failure for GPU #else diff --git a/onnxruntime/test/providers/cpu/tensor/where_op_test.cc b/onnxruntime/test/providers/cpu/tensor/where_op_test.cc index 7308041194bf5..6237521b34dfd 100644 --- a/onnxruntime/test/providers/cpu/tensor/where_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/where_op_test.cc @@ -62,7 +62,7 @@ void WhereBroadcastTest(const T& x_value, const T& y_value) { } test.AddOutput("output", {3, 3, 3}, result); -#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) +#if defined(OPENVINO_CONFIG_GPU) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); // OpenVINO: Disabled due to failure for GPU #else @@ -86,7 +86,7 @@ void WhereBroadcastTest(const T& x_value, const T& y_value) { } test.AddOutput("output", {3, 3, 3}, result); -#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) +#if defined(OPENVINO_CONFIG_GPU) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); // OpenVINO: Disabled due to failure for GPU #else diff --git a/onnxruntime/test/python/onnx_backend_test_series.py b/onnxruntime/test/python/onnx_backend_test_series.py index b8897c98c2a0a..395315b2a2b0c 100644 --- a/onnxruntime/test/python/onnx_backend_test_series.py +++ b/onnxruntime/test/python/onnx_backend_test_series.py @@ -134,13 +134,11 @@ def create_backend_test(test_name=None): if backend.supports_device("NNAPI"): current_failing_tests += apply_filters(filters, "current_failing_tests_NNAPI") - if backend.supports_device("OPENVINO_GPU_FP32") or backend.supports_device("OPENVINO_GPU_FP16"): + if backend.supports_device("OPENVINO_GPU"): current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_GPU") - if backend.supports_device("OPENVINO_CPU_FP32"): + if backend.supports_device("OPENVINO_CPU"): current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP32") - - if backend.supports_device("OPENVINO_CPU_FP16"): current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP16") if backend.supports_device("OPENVINO_NPU"): diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index 0d141d634e051..005128bc05d4a 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -553,7 +553,7 @@ "test_reduce_max_bool_inputs_cpu", "test_gelu_default_1_cpu", // Disabled due to accuracy mismatch "test_gelu_default_2_cpu" - + ], "current_failing_tests_OPENVINO_NPU": [ "^test_prelu_broadcast", diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 33dc403777de6..0f34f2c01cc74 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -73,13 +73,11 @@ def _str_to_bool(s): def _openvino_verify_device_type(device_read): - choices = ["CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16", "NPU"] + choices = ["CPU", "GPU", "NPU"] choices1 = [ - "CPU_FP32_NO_PARTITION", - "CPU_FP16_NO_PARTITION", - "GPU_FP32_NO_PARTITION", - "GPU_FP16_NO_PARTITION", + "CPU_NO_PARTITION", + "GPU_NO_PARTITION", "NPU_NO_PARTITION", ] status_hetero = True @@ -534,7 +532,7 @@ def convert_arg_line_to_args(self, arg_line): parser.add_argument( "--use_openvino", nargs="?", - const="CPU_FP32", + const="CPU", type=_openvino_verify_device_type, help="Build with OpenVINO for specific hardware.", ) @@ -1223,19 +1221,11 @@ def generate_build_tree( if args.use_openvino: cmake_args += [ "-Donnxruntime_USE_OPENVINO=ON", - "-Donnxruntime_USE_OPENVINO_GPU_FP32=" + ("ON" if args.use_openvino == "GPU_FP32" else "OFF"), - "-Donnxruntime_USE_OPENVINO_GPU_FP16=" + ("ON" if args.use_openvino == "GPU_FP16" else "OFF"), - "-Donnxruntime_USE_OPENVINO_CPU_FP32=" + ("ON" if args.use_openvino == "CPU_FP32" else "OFF"), - "-Donnxruntime_USE_OPENVINO_CPU_FP16=" + ("ON" if args.use_openvino == "CPU_FP16" else "OFF"), + "-Donnxruntime_USE_OPENVINO_GPU=" + ("ON" if args.use_openvino == "GPU" else "OFF"), + "-Donnxruntime_USE_OPENVINO_CPU=" + ("ON" if args.use_openvino == "CPU" else "OFF"), "-Donnxruntime_USE_OPENVINO_NPU=" + ("ON" if args.use_openvino == "NPU" else "OFF"), - "-Donnxruntime_USE_OPENVINO_GPU_FP32_NP=" - + ("ON" if args.use_openvino == "GPU_FP32_NO_PARTITION" else "OFF"), - "-Donnxruntime_USE_OPENVINO_GPU_FP16_NP=" - + ("ON" if args.use_openvino == "GPU_FP16_NO_PARTITION" else "OFF"), - "-Donnxruntime_USE_OPENVINO_CPU_FP32_NP=" - + ("ON" if args.use_openvino == "CPU_FP32_NO_PARTITION" else "OFF"), - "-Donnxruntime_USE_OPENVINO_CPU_FP16_NP=" - + ("ON" if args.use_openvino == "CPU_FP16_NO_PARTITION" else "OFF"), + "-Donnxruntime_USE_OPENVINO_GPU_NP=" + ("ON" if args.use_openvino == "GPU_NO_PARTITION" else "OFF"), + "-Donnxruntime_USE_OPENVINO_CPU_NP=" + ("ON" if args.use_openvino == "CPU_NO_PARTITION" else "OFF"), "-Donnxruntime_USE_OPENVINO_NPU_NP=" + ("ON" if args.use_openvino == "NPU_NO_PARTITION" else "OFF"), "-Donnxruntime_USE_OPENVINO_HETERO=" + ("ON" if args.use_openvino.startswith("HETERO") else "OFF"), "-Donnxruntime_USE_OPENVINO_DEVICE=" + (args.use_openvino), @@ -2636,7 +2626,7 @@ def main(): raise BuildError("Using --get-api-doc requires a single build config") # Disabling unit tests for GPU on nuget creation - if args.use_openvino and args.use_openvino != "CPU_FP32" and args.build_nuget: + if args.use_openvino and args.use_openvino != "CPU" and args.build_nuget: args.test = False # GDK builds don't support testing diff --git a/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml index 03e0274fc198a..45d763384ee2c 100644 --- a/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml @@ -32,5 +32,5 @@ jobs: parameters: AgentPool : 'Linux-CPU-2019' JobName: 'Linux_CI_Dev' - RunDockerBuildArgs: '-o ubuntu20.04 -d openvino -v 2024.0.0 -x "--use_openvino CPU_FP32 --build_wheel"' + RunDockerBuildArgs: '-o ubuntu20.04 -d openvino -v 2024.0.0 -x "--use_openvino CPU --build_wheel"' TimeoutInMinutes: 120 diff --git a/tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml index 4c80aedeb1f18..afa0ad6f4cbc7 100644 --- a/tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml @@ -27,7 +27,7 @@ parameters: - name: cpu_build_py_parameters displayName: 'Extra parameters to pass to build.py for CPU package.' type: string - default: '--use_openvino CPU_FP32' + default: '--use_openvino CPU' - name: gpu_build_py_parameters displayName: 'Extra parameters to pass to build.py for GPU package.' diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml index 01cab936aa529..47b0d2188aa9f 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml @@ -27,7 +27,7 @@ parameters: - name: cpu_build_py_parameters displayName: 'Extra parameters to pass to build.py for CPU package.' type: string - default: '--use_openvino CPU_FP32' + default: '--use_openvino CPU' - name: gpu_build_py_parameters displayName: 'Extra parameters to pass to build.py for GPU package.'