From 1f88284f96d98b735824c8a9eebee4fc85becf7d Mon Sep 17 00:00:00 2001 From: Ankit Maheshkar Date: Thu, 12 Dec 2024 11:56:32 +0530 Subject: [PATCH] OVEP 1.21.0 Development Updates (#23080) ### Description OVEP development changes for ORT 1.21 Release ### Motivation and Context - Has Critical Bug Fixes - Improved Performance optimizations for both memory & inference latency (https://github.com/intel/onnxruntime/pull/513) - Enabled Model Compilation using NPUW (https://github.com/intel/onnxruntime/pull/508) - Fixed support for EPContext embed mode 0 for lower memory utilization - Updated NuGet package name as `Intel.ML.OnnxRuntime.OpenVino` - Fixed QDQ Stripping logic on NPU --- cmake/onnxruntime_providers_openvino.cmake | 4 +- .../providers/openvino/backend_manager.cc | 5 +- .../core/providers/openvino/backend_utils.cc | 16 +++--- .../core/providers/openvino/backend_utils.h | 2 +- .../openvino/backends/basic_backend.cc | 57 ++++++++++++++++--- .../openvino/backends/basic_backend.h | 1 - .../openvino/onnx_ctx_model_helper.cc | 13 ++++- .../openvino/onnx_ctx_model_helper.h | 6 +- .../openvino/openvino_execution_provider.h | 2 +- .../openvino/openvino_provider_factory.cc | 4 +- .../core/providers/openvino/ov_allocator.cc | 1 - .../core/providers/openvino/ov_interface.cc | 8 +-- .../core/providers/openvino/ov_interface.h | 2 +- .../openvino/ov_versions/capability.cc | 8 +-- .../openvino/ov_versions/data_ops.cc | 12 ++-- .../providers/openvino/ov_versions/data_ops.h | 3 +- .../qdq_transformations/qdq_stripping.cc | 6 +- tools/ci_build/build.py | 2 +- .../linux-openvino-ci-pipeline.yml | 2 +- .../linux/docker/Dockerfile.ubuntu_openvino | 8 +-- .../nuget/generate_nuspec_for_native_nuget.py | 28 ++++++++- 21 files changed, 138 insertions(+), 52 deletions(-) diff --git a/cmake/onnxruntime_providers_openvino.cmake b/cmake/onnxruntime_providers_openvino.cmake index e500957f864f8..f5fae8d169ccc 100644 --- a/cmake/onnxruntime_providers_openvino.cmake +++ b/cmake/onnxruntime_providers_openvino.cmake @@ -13,8 +13,8 @@ # Header paths find_package(OpenVINO REQUIRED COMPONENTS Runtime ONNX) - if(OpenVINO_VERSION VERSION_LESS 2024.3) - message(FATAL_ERROR "OpenVINO 2024.3 and newer are supported. Please, use latest OpenVINO release") + if(OpenVINO_VERSION VERSION_LESS 2024.4) + message(FATAL_ERROR "OpenVINO 2024.4 and newer are supported. Please, use latest OpenVINO release") endif() if(OpenVINO_VERSION VERSION_GREATER_EQUAL 2024.4) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 0ffde116f4efc..a0bcf953938d9 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -70,7 +70,10 @@ BackendManager::BackendManager(const GlobalContext& global_context, i++; } subgraph_context_.subgraph_name = fused_node.Name(); - auto model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger); + std::unique_ptr model_proto; + if (!ep_ctx_handle_.IsValidOVEPCtxGraph()) { + model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger); + } std::string device_type = openvino_ep::BackendManager::GetGlobalContext().device_type; if (ModelHasSymbolicInputDims(subgraph)) { diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index f772b9c3b0478..b97736f2e124d 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -39,7 +39,7 @@ struct static_cast_int64 { int64_t operator()(const T1& x) const { return static_cast(x); } }; -std::shared_ptr +std::shared_ptr CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, std::map>& const_outputs_map) { if (IsCILogEnabled()) { @@ -47,13 +47,13 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext } const std::string model = model_proto.SerializeAsString(); try { - auto cnn_network = global_context.ie_core.ReadModel(model, global_context.onnx_model_path_name); + auto ov_model = global_context.ie_core.ReadModel(model, global_context.onnx_model_path_name); // Check for Constant Folding - if (!global_context.is_wholly_supported_graph) { + if ((global_context.device_type != "NPU") && !global_context.is_wholly_supported_graph) { ov::pass::ConstantFolding pass_const_obj; - pass_const_obj.run_on_model(cnn_network); - auto& results = const_cast(cnn_network.get()->get_results()); + pass_const_obj.run_on_model(ov_model); + auto& results = const_cast(ov_model.get()->get_results()); size_t index = results.size() - 1; for (auto it = results.rbegin(); it != results.rend(); ++it) { @@ -67,12 +67,12 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext } #ifndef NDEBUG if (IsDebugEnabled()) { - std::string name = cnn_network->get_friendly_name(); + std::string name = ov_model->get_friendly_name(); ov::pass::Serialize serializer(name + ".xml", name + ".bin"); - serializer.run_on_model(cnn_network); + serializer.run_on_model(ov_model); } #endif - return cnn_network; + return ov_model; } catch (std::string const& msg) { ORT_THROW(msg); } diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h index 9e65770da7d23..9d58e1ca73abb 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.h +++ b/onnxruntime/core/providers/openvino/backend_utils.h @@ -60,7 +60,7 @@ void FillInputBlob(OVTensorPtr inputBlob, size_t batch_slice_idx, void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor, size_t batch_slice_idx); -std::shared_ptr +std::shared_ptr CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, std::map>& const_outputs_map); diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 56cceb8cf2a19..435ca83ff69d4 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -48,6 +48,16 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr // Set the inference_num_threads property of the CPU SetNumThreads(device_config); + auto npuw_status = + std::any_of(device_config.begin(), device_config.end(), [&](const std::pair& pair) { + return (pair.first.find("NPU_USE_NPUW") != std::string::npos) && (pair.second.is()) && + (pair.second.as() == "YES"); + }); + + if (npuw_status) { + LOGS_DEFAULT(INFO) << log_tag << "NPUW Enabled during compilation"; + } + try { std::string dev_prec = global_context.device_type + "_" + global_context_.precision_str; @@ -81,7 +91,6 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr device_config, global_context_.ep_context_embed_mode, subgraph_context_.subgraph_name); - ie_cnn_network_ = exe_network_.Get().get_runtime_model(); } else if (global_context_.export_ep_ctx_blob && hw_target.find("NPU") != std::string::npos && !global_context_.has_external_weights) { @@ -106,15 +115,15 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr device_config, subgraph_context_.subgraph_name); } else { // For all other types use ov::Model Type - ie_cnn_network_ = CreateOVModel(*model_proto, global_context_, const_outputs_map_); + auto ov_model = CreateOVModel(*model_proto, global_context_, const_outputs_map_); exe_network_ = global_context_.ie_core.CompileModel( - ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name); + ov_model, hw_target, device_config, subgraph_context_.subgraph_name); } #endif } else { // Full graph is not supported - ie_cnn_network_ = CreateOVModel(*model_proto, global_context_, const_outputs_map_); + auto ov_model = CreateOVModel(*model_proto, global_context_, const_outputs_map_); exe_network_ = global_context_.ie_core.CompileModel( - ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name); + ov_model, hw_target, device_config, subgraph_context_.subgraph_name); } LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; } catch (const char* msg) { @@ -145,8 +154,8 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) { device_config.emplace(ov::hint::inference_precision("f32")); } if (global_context_.precision_str.find("ACCURACY") != std::string::npos && - global_context_.device_type == "GPU") { - if (global_context_.OpenVINO_Version.at(0) >= 2024 && global_context_.OpenVINO_Version.at(1) >= 1) { + global_context_.device_type.find("GPU") != std::string::npos) { + if (global_context_.OpenVINO_Version.at(0) >= 2024) { device_config.emplace(ov::hint::inference_precision(ov::element::undefined)); device_config.emplace(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)); } else { @@ -174,7 +183,7 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) { device_property = std::make_pair("NPU_COMPILER_TYPE", env_npu_compiler_type); } device_config.emplace(ov::device::properties("NPU", device_property)); -#if (OPENVINO_VERSION_MAJOR >= 2024) && (OPENVINO_VERSION_MINOR > 3) +#if (((OPENVINO_VERSION_MAJOR == 2024) && (OPENVINO_VERSION_MINOR > 3)) || (OPENVINO_VERSION_MAJOR > 2024)) if (global_context_.export_ep_ctx_blob) { global_context_.ie_core.Get().set_property("NPU", ov::intel_npu::bypass_umd_caching(true)); } @@ -184,6 +193,33 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) { if (!global_context_.load_config.empty()) { const std::map& target_config = global_context_.load_config; + if (global_context_.device_type.find("NPU") != std::string::npos) { + auto npuw_config = target_config.at("NPU"); + + // Check if "NPU_USE_NPUW" exists and is set to "YES" + auto npu_use_npuw_it = npuw_config.find("NPU_USE_NPUW"); + if (npu_use_npuw_it != npuw_config.end() && + npu_use_npuw_it->second.is() && + npu_use_npuw_it->second.as() == "YES") { + // Only add NPUW-related keys if NPU_USE_NPUW is "YES" + for (const auto& [key, value] : npuw_config) { + if (key.find("NPUW") != std::string::npos) { + if (!value.is()) { + LOGS_DEFAULT(ERROR) << "Invalid value type for key: " << key; + continue; + } + device_config[key] = value; + } + } + } else { + // Check if there are any "NPUW" keys and log a warning + if (std::any_of(npuw_config.begin(), npuw_config.end(), + [&](const auto& pair) { return pair.first.find("NPUW") != std::string::npos; })) { + LOGS_DEFAULT(WARNING) << "Skipping NPUW-related configurations as NPU_USE_NPUW is not set to 'YES'."; + } + } + } + // Parse device types like "AUTO:CPU,GPU" and extract individual devices auto parse_individual_devices = [&](const std::string& device_type) -> std::vector { std::vector devices; @@ -213,6 +249,9 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) { auto set_target_properties = [&](const std::string& device, const ov::AnyMap& config_options, const std::vector& supported_properties) { for (const auto& [key, value] : config_options) { + if (key.find("NPUW") != std::string::npos) { + continue; + } if (is_supported_and_mutable(key, supported_properties)) { global_context_.ie_core.Get().set_property(device, ov::AnyMap{{key, value}}); } else { @@ -378,7 +417,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque if ((it == ort_ov_tensor_map.end()) || (it != ort_ov_tensor_map.end() && (it->second.ort_ptr != tensor.GetTensorRawData()))) { ov_tensor_data_t ov_tensor_data; - auto input = graph_input_info.at(input_idx); + const auto& input = graph_input_info.at(input_idx); ov_tensor_data.tensor_ptr = std::make_shared(input.get_element_type(), input.get_shape(), const_cast(tensor.GetTensorRawData())); diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h index 12502a1d83c5d..3fcf6e4384d52 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.h +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h @@ -58,7 +58,6 @@ class BasicBackend : public IBackend { GlobalContext& global_context_; SubGraphContext subgraph_context_; mutable std::mutex compute_lock_; - std::shared_ptr ie_cnn_network_; OVExeNetwork exe_network_; std::map> const_outputs_map_; std::unique_ptr inferRequestsQueue_; diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc index 42a2b5d30c25c..6d159db3b390d 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc @@ -99,7 +99,9 @@ Status EPCtxHandler::ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer, b auto node = graph_viewer.GetNode(0); auto& attrs = node->GetAttributes(); ORT_ENFORCE(attrs.count(EP_CACHE_CONTEXT) > 0); - model_stream_ = std::make_shared(attrs.at(EP_CACHE_CONTEXT).s()); + + ep_cache_context_attribute_ = &attrs.at(EP_CACHE_CONTEXT); + ep_context_embed_mode = static_cast(attrs.at(EMBED_MODE).i()); LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node"; @@ -107,6 +109,15 @@ Status EPCtxHandler::ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer, b return Status::OK(); } +const std::string& EPCtxHandler::GetModelBlobStream() const { + static std::string empty; + if (ep_cache_context_attribute_ != nullptr) { + return ep_cache_context_attribute_->s(); + } else { + return empty; + } +} + bool EPCtxHandler::CheckForOVEPCtxNode(const GraphViewer& graph_viewer, std::string openvino_sdk_version) const { for (int i = 0; i < graph_viewer.MaxNodeIndex(); ++i) { auto node = graph_viewer.GetNode(i); diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h index c7ee943dff761..caab33b7db775 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h @@ -23,7 +23,7 @@ static const char SOURCE[] = "source"; class EPCtxHandler { public: EPCtxHandler() = default; - EPCtxHandler(const EPCtxHandler&) = default; + EPCtxHandler(const EPCtxHandler&) = delete; Status ExportEPCtxModel(const GraphViewer& graph_viewer, const std::string& graph_name, const logging::Logger& logger, @@ -33,11 +33,11 @@ class EPCtxHandler { Status ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer, bool& ep_context_embed_mode); bool CheckForOVEPCtxNode(const GraphViewer& graph_viewer, std::string openvino_sdk_version) const; bool IsValidOVEPCtxGraph() const { return is_valid_ep_ctx_graph_; } - [[nodiscard]] const std::shared_ptr GetModelBlobStream() const { return model_stream_; } + const std::string& GetModelBlobStream() const; private: bool is_valid_ep_ctx_graph_{false}; - std::shared_ptr model_stream_; + const onnx::AttributeProto* ep_cache_context_attribute_; }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h index 59dbd141f4782..d5c22a4e2a9e4 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h @@ -159,7 +159,7 @@ struct OpenVINOExecutionProviderInfo { device_type_ = std::move(dev_type); } else if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0 || dev_type.find("AUTO") == 0) { std::vector devices = parseDevices(dev_type, available_devices); - device_type_ = dev_type; + device_type_ = std::move(dev_type); } else { ORT_THROW("Invalid device string: " + dev_type); } diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index 66f9bcb7b2a5e..5855cb594a08e 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -57,7 +57,7 @@ std::unique_ptr OpenVINOProviderFactory::CreateProvider() { std::string so_cache_path = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, "").c_str(); if (so_export_ep_ctx_blob && !so_cache_path.empty()) { - cache_dir_ = so_cache_path; + cache_dir_ = std::move(so_cache_path); auto file_path = std::filesystem::path(cache_dir_); // ep_context_file_path_ file extension must be .onnx if (file_path.extension().generic_string() == ".onnx") { @@ -248,7 +248,7 @@ struct OpenVINO_Provider : Provider { LOGS_DEFAULT(WARNING) << "Unsupported JSON value type for key: " << inner_key << ". Skipping key."; } } - target_map[key] = inner_map; + target_map[key] = std::move(inner_map); } } catch (const nlohmann::json::parse_error& e) { // Handle syntax errors in JSON diff --git a/onnxruntime/core/providers/openvino/ov_allocator.cc b/onnxruntime/core/providers/openvino/ov_allocator.cc index 6700244b754d8..0e5ff8ff98efb 100644 --- a/onnxruntime/core/providers/openvino/ov_allocator.cc +++ b/onnxruntime/core/providers/openvino/ov_allocator.cc @@ -39,7 +39,6 @@ void* OVRTAllocator::Alloc(size_t size) { } catch (const ov::Exception& e) { ORT_THROW(std::string("Alloc failed: ") + e.what()); } - return nullptr; } void OVRTAllocator::Free(void* p) { diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index 7e8681d304abf..12ab7ecede031 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -109,7 +109,7 @@ OVExeNetwork OVCore::CompileModel(const std::string& onnx_model, } } -OVExeNetwork OVCore::ImportModel(std::shared_ptr model_stream, +OVExeNetwork OVCore::ImportModel(const std::string& model_string, std::string hw_target, const ov::AnyMap& device_config, bool embed_mode, @@ -117,10 +117,10 @@ OVExeNetwork OVCore::ImportModel(std::shared_ptr model_strea try { ov::CompiledModel obj; if (embed_mode) { - obj = oe.import_model(*model_stream, hw_target, device_config); + std::istringstream model_stream(model_string); + obj = oe.import_model(model_stream, hw_target, device_config); } else { - std::string blob_file_path = (*model_stream).str(); - std::ifstream modelStream(blob_file_path, std::ios_base::binary | std::ios_base::in); + std::ifstream modelStream(model_string, std::ios_base::binary | std::ios_base::in); obj = oe.import_model(modelStream, hw_target, {}); diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h index f4da4ea3e3244..c3417003f8e1f 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.h +++ b/onnxruntime/core/providers/openvino/ov_interface.h @@ -54,7 +54,7 @@ class OVCore { ov::AnyMap& device_config, const std::string& name); // OV Interface for Import model Stream - OVExeNetwork ImportModel(std::shared_ptr model_stream, + OVExeNetwork ImportModel(const std::string& model_string, std::string hw_target, const ov::AnyMap& device_config, bool embed_mode, diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc index 95c7466e02f2f..3e780f74145ae 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc @@ -35,14 +35,14 @@ GetCapability::GetCapability(const GraphViewer& graph_viewer_param, device_type_ = "CPU"; if (enable_qdq_optimizer) npu_qdq_optimizer_enabled = true; } -#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 3 - data_ops_ = new DataOps(graph_viewer_, V_2024_3, device_type_, npu_qdq_optimizer_enabled); -#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 4 +#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 4 data_ops_ = new DataOps(graph_viewer_, V_2024_4, device_type_, npu_qdq_optimizer_enabled); #elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 5 data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled); +#elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 0 + data_ops_ = new DataOps(graph_viewer_, V_2025_0, device_type_, npu_qdq_optimizer_enabled); #else - data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled); + data_ops_ = new DataOps(graph_viewer_, V_2025_0, device_type_, npu_qdq_optimizer_enabled); #endif } diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index b2c5fd6f83167..f118f057ac11e 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -355,6 +355,7 @@ void DataOps::populate_op_mode_supported() { no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Identity", V_2023_0, {"All"}}); + no_dimension_supported_.push_back({"If", V_2022_3, {"CPU", "GPU"}}); no_dimension_supported_.push_back({"Less", V_2022_1, {"CPU"}}); no_dimension_supported_.push_back({"Loop", V_2021_4, {"All"}}); no_dimension_supported_.push_back({"Min", V_2020_4, {"All"}}); @@ -387,7 +388,7 @@ void DataOps::populate_op_mode_supported() { // populate unsupportedmode_t { - UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5}, + UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5, V_2025_0}, [this](const Node* node, const InitializedTensorSet&) { // If the Input of ReduceMax op is UINT8, it is rejected (Due to output mismatch) for (size_t i = 0; i < node->InputDefs().size(); i++) { @@ -402,7 +403,8 @@ void DataOps::populate_op_mode_supported() { op_list_.insert({"ReduceMax", obj}); } { - UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5}, + UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, + V_2024_3, V_2024_4, V_2024_5, V_2025_0}, [this](const Node* node, const InitializedTensorSet&) { const auto& input_arg = node->InputDefs()[1]; auto shape = input_arg->Shape(); @@ -419,7 +421,8 @@ void DataOps::populate_op_mode_supported() { op_list_.insert({"Reshape", obj}); } { - UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5}, + UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, + V_2024_3, V_2024_4, V_2024_5, V_2025_0}, [this](const Node* node, const InitializedTensorSet&) { // If the operator is unsqueeze // If axes is an input, then we cannot produce a static graph. @@ -434,7 +437,8 @@ void DataOps::populate_op_mode_supported() { op_list_.insert({"Unsqueeze", obj}); } { - UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5}, + UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5, + V_2025_0}, [this](const Node* node, const InitializedTensorSet&) { // check for attributes auto& upsample_attr = node->GetAttributes(); diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h index a2db56deca7cd..07fa36f355d55 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h @@ -32,7 +32,8 @@ enum versionNum { V_2024_2, V_2024_3, V_2024_4, - V_2024_5 + V_2024_5, + V_2025_0 }; using VersionNum = enum versionNum; diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc index decfe91c598be..387aaf9985b4c 100644 --- a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc +++ b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc @@ -30,6 +30,10 @@ constexpr std::string_view DuplicateDQ = "/duplicated"; constexpr ONNX_NAMESPACE::TensorProto_DataType DT_UINT16 = ONNX_NAMESPACE::TensorProto_DataType_UINT16; constexpr ONNX_NAMESPACE::TensorProto_DataType DT_INT16 = ONNX_NAMESPACE::TensorProto_DataType_INT16; +constexpr ONNX_NAMESPACE::TensorProto_DataType DT_UINT8 = ONNX_NAMESPACE::TensorProto_DataType_UINT8; +constexpr ONNX_NAMESPACE::TensorProto_DataType DT_INT8 = ONNX_NAMESPACE::TensorProto_DataType_INT8; +constexpr ONNX_NAMESPACE::TensorProto_DataType DT_UINT4 = ONNX_NAMESPACE::TensorProto_DataType_UINT4; +constexpr ONNX_NAMESPACE::TensorProto_DataType DT_INT4 = ONNX_NAMESPACE::TensorProto_DataType_INT4; // Return the data type of the qdq node. // Check output type of Q and input type of DQ to determine it as zero_point is an optional input and may not exist @@ -218,7 +222,7 @@ static bool DQFeedsASupportedOp(const Node* dq_node) { } else { return true; } - } else if (op_type == "Add") { + } else if (op_type == "Add" && !(GetQDQDataType(dq_node) == DT_UINT16 || GetQDQDataType(dq_node) == DT_INT16)) { // Add => keeps all DQs return true; } diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 6ee37b8b0519e..3527a89ca7a7b 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -2332,7 +2332,7 @@ def build_nuget_package( target_name = "/t:CreateWindowsAIPackage" elif use_openvino: execution_provider = "/p:ExecutionProvider=openvino" - package_name = "/p:OrtPackageId=Microsoft.ML.OnnxRuntime.OpenVino" + package_name = "/p:OrtPackageId=Intel.ML.OnnxRuntime.OpenVino" elif use_tensorrt: execution_provider = "/p:ExecutionProvider=tensorrt" package_name = "/p:OrtPackageId=Microsoft.ML.OnnxRuntime.TensorRT" diff --git a/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml index 9ee589a3d6ef3..c7b814f3dd52c 100644 --- a/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml @@ -33,5 +33,5 @@ jobs: parameters: AgentPool : 'Linux-CPU-2019' JobName: 'Linux_CI_Dev' - RunDockerBuildArgs: '-o ubuntu22.04 -p 3.10 -d openvino -v 2024.4.0 -x "--use_openvino CPU --build_wheel"' + RunDockerBuildArgs: '-o ubuntu22.04 -p 3.10 -d openvino -v 2024.5.0 -x "--use_openvino CPU --build_wheel"' TimeoutInMinutes: 120 diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino index 8f3dcb69d6c56..643c0d66d01f5 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino +++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino @@ -1,7 +1,7 @@ ARG UBUNTU_VERSION=22.04 FROM ubuntu:${UBUNTU_VERSION} -ARG OPENVINO_VERSION=2024.4.0 +ARG OPENVINO_VERSION=2024.5.0 ARG PYTHON_VERSION=3.10 ADD scripts /tmp/scripts @@ -19,9 +19,9 @@ ENV IE_PLUGINS_PATH=$INTEL_OPENVINO_DIR/runtime/lib/intel64 ENV DEBIAN_FRONTEND=noninteractive RUN cd /opt && mkdir -p intel && cd intel && \ - wget https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.4/linux/l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64.tgz && \ - tar xzf l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64.tgz && rm -rf l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64.tgz && \ - mv l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64 openvino_2024.4.0 && \ + wget https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_ubuntu22_2024.5.0.17288.7975fa5da0c_x86_64.tgz && \ + tar xzf l_openvino_toolkit_ubuntu22_2024.5.0.17288.7975fa5da0c_x86_64.tgz && rm -rf l_openvino_toolkit_ubuntu22_2024.5.0.17288.7975fa5da0c_x86_64.tgz && \ + mv l_openvino_toolkit_ubuntu22_2024.5.0.17288.7975fa5da0c_x86_64 openvino_2024.5.0 && \ cd $INTEL_OPENVINO_DIR/install_dependencies && ./install_openvino_dependencies.sh -y WORKDIR /root diff --git a/tools/nuget/generate_nuspec_for_native_nuget.py b/tools/nuget/generate_nuspec_for_native_nuget.py index ba125f4e2d980..11842f34ce45b 100644 --- a/tools/nuget/generate_nuspec_for_native_nuget.py +++ b/tools/nuget/generate_nuspec_for_native_nuget.py @@ -182,6 +182,8 @@ def generate_description(line_list, package_name): description = "This package contains Linux native shared library artifacts for ONNX Runtime with CUDA." elif "Microsoft.ML.OnnxRuntime.Gpu.Windows" in package_name: description = "This package contains Windows native shared library artifacts for ONNX Runtime with CUDA." + elif "Intel.ML.OnnxRuntime" in package_name: + description = "This package contains native shared library artifacts for ONNX Runtime with OpenVINO." elif "Microsoft.ML.OnnxRuntime" in package_name: # This is a Microsoft.ML.OnnxRuntime.* package description = ( "This package contains native shared library artifacts for all supported platforms of ONNX Runtime." @@ -715,7 +717,7 @@ def generate_files(line_list, args): ) if args.execution_provider == "openvino": - get_env_var("INTEL_OPENVINO_DIR") + openvino_path = get_env_var("INTEL_OPENVINO_DIR") files_list.append( "' ) + if is_windows(): + dll_list_path = os.path.join(openvino_path, "runtime\\bin\\intel64\\Release\\") + tbb_list_path = os.path.join(openvino_path, "runtime\\3rdparty\\tbb\\bin\\") + for dll_element in os.listdir(dll_list_path): + if dll_element.endswith("dll"): + files_list.append( + "' + ) + for tbb_element in os.listdir(tbb_list_path): + if tbb_element.endswith("dll"): + files_list.append( + "' + ) + if args.execution_provider == "cuda" or is_cuda_gpu_win_sub_package and not is_ado_packaging_build: files_list.append( "