Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OVEP 1.21.0 Development Updates #23080

Merged
merged 1 commit into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmake/onnxruntime_providers_openvino.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@

# Header paths
find_package(OpenVINO REQUIRED COMPONENTS Runtime ONNX)
if(OpenVINO_VERSION VERSION_LESS 2024.3)
message(FATAL_ERROR "OpenVINO 2024.3 and newer are supported. Please, use latest OpenVINO release")
if(OpenVINO_VERSION VERSION_LESS 2024.4)
message(FATAL_ERROR "OpenVINO 2024.4 and newer are supported. Please, use latest OpenVINO release")
endif()

if(OpenVINO_VERSION VERSION_GREATER_EQUAL 2024.4)
Expand Down
5 changes: 4 additions & 1 deletion onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ BackendManager::BackendManager(const GlobalContext& global_context,
i++;
}
subgraph_context_.subgraph_name = fused_node.Name();
auto model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger);
std::unique_ptr<onnx::ModelProto> model_proto;
if (!ep_ctx_handle_.IsValidOVEPCtxGraph()) {
model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger);
}
std::string device_type = openvino_ep::BackendManager::GetGlobalContext().device_type;

if (ModelHasSymbolicInputDims(subgraph)) {
Expand Down
16 changes: 8 additions & 8 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,21 @@ struct static_cast_int64 {
int64_t operator()(const T1& x) const { return static_cast<int64_t>(x); }
};

std::shared_ptr<OVNetwork>
std::shared_ptr<const OVNetwork>
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context,
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
if (IsCILogEnabled()) {
std::cout << "CreateNgraphFunc" << std::endl;
}
const std::string model = model_proto.SerializeAsString();
try {
auto cnn_network = global_context.ie_core.ReadModel(model, global_context.onnx_model_path_name);
auto ov_model = global_context.ie_core.ReadModel(model, global_context.onnx_model_path_name);

// Check for Constant Folding
if (!global_context.is_wholly_supported_graph) {
if ((global_context.device_type != "NPU") && !global_context.is_wholly_supported_graph) {
ov::pass::ConstantFolding pass_const_obj;
pass_const_obj.run_on_model(cnn_network);
auto& results = const_cast<ov::ResultVector&>(cnn_network.get()->get_results());
pass_const_obj.run_on_model(ov_model);
auto& results = const_cast<ov::ResultVector&>(ov_model.get()->get_results());
size_t index = results.size() - 1;

for (auto it = results.rbegin(); it != results.rend(); ++it) {
Expand All @@ -67,12 +67,12 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
}
#ifndef NDEBUG
if (IsDebugEnabled()) {
std::string name = cnn_network->get_friendly_name();
std::string name = ov_model->get_friendly_name();
ov::pass::Serialize serializer(name + ".xml", name + ".bin");
serializer.run_on_model(cnn_network);
serializer.run_on_model(ov_model);
}
#endif
return cnn_network;
return ov_model;
} catch (std::string const& msg) {
ORT_THROW(msg);
}
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/openvino/backend_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ void FillInputBlob(OVTensorPtr inputBlob, size_t batch_slice_idx,
void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
size_t batch_slice_idx);

std::shared_ptr<OVNetwork>
std::shared_ptr<const OVNetwork>
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto,
const GlobalContext& global_context,
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
Expand Down
57 changes: 48 additions & 9 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
// Set the inference_num_threads property of the CPU
SetNumThreads(device_config);

auto npuw_status =
std::any_of(device_config.begin(), device_config.end(), [&](const std::pair<std::string, ov::Any>& pair) {
return (pair.first.find("NPU_USE_NPUW") != std::string::npos) && (pair.second.is<std::string>()) &&
(pair.second.as<std::string>() == "YES");
});

if (npuw_status) {
LOGS_DEFAULT(INFO) << log_tag << "NPUW Enabled during compilation";
}

try {
std::string dev_prec = global_context.device_type + "_" + global_context_.precision_str;

Expand Down Expand Up @@ -81,7 +91,6 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
device_config,
global_context_.ep_context_embed_mode,
subgraph_context_.subgraph_name);
ie_cnn_network_ = exe_network_.Get().get_runtime_model();
} else if (global_context_.export_ep_ctx_blob &&
hw_target.find("NPU") != std::string::npos &&
!global_context_.has_external_weights) {
Expand All @@ -106,15 +115,15 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
device_config,
subgraph_context_.subgraph_name);
} else { // For all other types use ov::Model Type
ie_cnn_network_ = CreateOVModel(*model_proto, global_context_, const_outputs_map_);
auto ov_model = CreateOVModel(*model_proto, global_context_, const_outputs_map_);
exe_network_ = global_context_.ie_core.CompileModel(
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
ov_model, hw_target, device_config, subgraph_context_.subgraph_name);
}
#endif
} else { // Full graph is not supported
ie_cnn_network_ = CreateOVModel(*model_proto, global_context_, const_outputs_map_);
auto ov_model = CreateOVModel(*model_proto, global_context_, const_outputs_map_);
exe_network_ = global_context_.ie_core.CompileModel(
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
ov_model, hw_target, device_config, subgraph_context_.subgraph_name);
}
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
} catch (const char* msg) {
Expand Down Expand Up @@ -145,8 +154,8 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
device_config.emplace(ov::hint::inference_precision("f32"));
}
if (global_context_.precision_str.find("ACCURACY") != std::string::npos &&
global_context_.device_type == "GPU") {
if (global_context_.OpenVINO_Version.at(0) >= 2024 && global_context_.OpenVINO_Version.at(1) >= 1) {
global_context_.device_type.find("GPU") != std::string::npos) {
if (global_context_.OpenVINO_Version.at(0) >= 2024) {
device_config.emplace(ov::hint::inference_precision(ov::element::undefined));
device_config.emplace(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY));
} else {
Expand Down Expand Up @@ -174,7 +183,7 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
device_property = std::make_pair("NPU_COMPILER_TYPE", env_npu_compiler_type);
}
device_config.emplace(ov::device::properties("NPU", device_property));
#if (OPENVINO_VERSION_MAJOR >= 2024) && (OPENVINO_VERSION_MINOR > 3)
#if (((OPENVINO_VERSION_MAJOR == 2024) && (OPENVINO_VERSION_MINOR > 3)) || (OPENVINO_VERSION_MAJOR > 2024))
if (global_context_.export_ep_ctx_blob) {
global_context_.ie_core.Get().set_property("NPU", ov::intel_npu::bypass_umd_caching(true));
}
Expand All @@ -184,6 +193,33 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
if (!global_context_.load_config.empty()) {
const std::map<std::string, ov::AnyMap>& target_config = global_context_.load_config;

if (global_context_.device_type.find("NPU") != std::string::npos) {
auto npuw_config = target_config.at("NPU");

// Check if "NPU_USE_NPUW" exists and is set to "YES"
auto npu_use_npuw_it = npuw_config.find("NPU_USE_NPUW");
if (npu_use_npuw_it != npuw_config.end() &&
npu_use_npuw_it->second.is<std::string>() &&
npu_use_npuw_it->second.as<std::string>() == "YES") {
// Only add NPUW-related keys if NPU_USE_NPUW is "YES"
for (const auto& [key, value] : npuw_config) {
if (key.find("NPUW") != std::string::npos) {
if (!value.is<std::string>()) {
LOGS_DEFAULT(ERROR) << "Invalid value type for key: " << key;
continue;
}
device_config[key] = value;
}
}
} else {
// Check if there are any "NPUW" keys and log a warning
if (std::any_of(npuw_config.begin(), npuw_config.end(),
[&](const auto& pair) { return pair.first.find("NPUW") != std::string::npos; })) {
LOGS_DEFAULT(WARNING) << "Skipping NPUW-related configurations as NPU_USE_NPUW is not set to 'YES'.";
}
}
}

// Parse device types like "AUTO:CPU,GPU" and extract individual devices
auto parse_individual_devices = [&](const std::string& device_type) -> std::vector<std::string> {
std::vector<std::string> devices;
Expand Down Expand Up @@ -213,6 +249,9 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
auto set_target_properties = [&](const std::string& device, const ov::AnyMap& config_options,
const std::vector<ov::PropertyName>& supported_properties) {
for (const auto& [key, value] : config_options) {
if (key.find("NPUW") != std::string::npos) {
continue;
}
if (is_supported_and_mutable(key, supported_properties)) {
global_context_.ie_core.Get().set_property(device, ov::AnyMap{{key, value}});
} else {
Expand Down Expand Up @@ -378,7 +417,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
if ((it == ort_ov_tensor_map.end()) ||
(it != ort_ov_tensor_map.end() && (it->second.ort_ptr != tensor.GetTensorRawData()))) {
ov_tensor_data_t ov_tensor_data;
auto input = graph_input_info.at(input_idx);
const auto& input = graph_input_info.at(input_idx);
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), input.get_shape(),
const_cast<void*>(tensor.GetTensorRawData()));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ class BasicBackend : public IBackend {
GlobalContext& global_context_;
SubGraphContext subgraph_context_;
mutable std::mutex compute_lock_;
std::shared_ptr<const OVNetwork> ie_cnn_network_;
OVExeNetwork exe_network_;
std::map<std::string, std::shared_ptr<ov::Node>> const_outputs_map_;
std::unique_ptr<InferRequestsQueue> inferRequestsQueue_;
Expand Down
13 changes: 12 additions & 1 deletion onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,25 @@ Status EPCtxHandler::ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer, b
auto node = graph_viewer.GetNode(0);
auto& attrs = node->GetAttributes();
ORT_ENFORCE(attrs.count(EP_CACHE_CONTEXT) > 0);
model_stream_ = std::make_shared<std::istringstream>(attrs.at(EP_CACHE_CONTEXT).s());

ep_cache_context_attribute_ = &attrs.at(EP_CACHE_CONTEXT);

ep_context_embed_mode = static_cast<bool>(attrs.at(EMBED_MODE).i());
LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node";

is_valid_ep_ctx_graph_ = true;
return Status::OK();
}

const std::string& EPCtxHandler::GetModelBlobStream() const {
static std::string empty;
if (ep_cache_context_attribute_ != nullptr) {
return ep_cache_context_attribute_->s();
} else {
return empty;
}
}

bool EPCtxHandler::CheckForOVEPCtxNode(const GraphViewer& graph_viewer, std::string openvino_sdk_version) const {
for (int i = 0; i < graph_viewer.MaxNodeIndex(); ++i) {
auto node = graph_viewer.GetNode(i);
Expand Down
6 changes: 3 additions & 3 deletions onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ static const char SOURCE[] = "source";
class EPCtxHandler {
public:
EPCtxHandler() = default;
EPCtxHandler(const EPCtxHandler&) = default;
EPCtxHandler(const EPCtxHandler&) = delete;
Status ExportEPCtxModel(const GraphViewer& graph_viewer,
const std::string& graph_name,
const logging::Logger& logger,
Expand All @@ -33,11 +33,11 @@ class EPCtxHandler {
Status ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer, bool& ep_context_embed_mode);
bool CheckForOVEPCtxNode(const GraphViewer& graph_viewer, std::string openvino_sdk_version) const;
bool IsValidOVEPCtxGraph() const { return is_valid_ep_ctx_graph_; }
[[nodiscard]] const std::shared_ptr<std::istringstream> GetModelBlobStream() const { return model_stream_; }
const std::string& GetModelBlobStream() const;

private:
bool is_valid_ep_ctx_graph_{false};
std::shared_ptr<std::istringstream> model_stream_;
const onnx::AttributeProto* ep_cache_context_attribute_;
};

} // namespace openvino_ep
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ struct OpenVINOExecutionProviderInfo {
device_type_ = std::move(dev_type);
} else if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0 || dev_type.find("AUTO") == 0) {
std::vector<std::string> devices = parseDevices(dev_type, available_devices);
device_type_ = dev_type;
device_type_ = std::move(dev_type);
} else {
ORT_THROW("Invalid device string: " + dev_type);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
std::string so_cache_path = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, "").c_str();

if (so_export_ep_ctx_blob && !so_cache_path.empty()) {
cache_dir_ = so_cache_path;
cache_dir_ = std::move(so_cache_path);
auto file_path = std::filesystem::path(cache_dir_);
// ep_context_file_path_ file extension must be .onnx
if (file_path.extension().generic_string() == ".onnx") {
Expand Down Expand Up @@ -248,7 +248,7 @@ struct OpenVINO_Provider : Provider {
LOGS_DEFAULT(WARNING) << "Unsupported JSON value type for key: " << inner_key << ". Skipping key.";
}
}
target_map[key] = inner_map;
target_map[key] = std::move(inner_map);
}
} catch (const nlohmann::json::parse_error& e) {
// Handle syntax errors in JSON
Expand Down
1 change: 0 additions & 1 deletion onnxruntime/core/providers/openvino/ov_allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ void* OVRTAllocator::Alloc(size_t size) {
} catch (const ov::Exception& e) {
ORT_THROW(std::string("Alloc failed: ") + e.what());
}
return nullptr;
}

void OVRTAllocator::Free(void* p) {
Expand Down
8 changes: 4 additions & 4 deletions onnxruntime/core/providers/openvino/ov_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,18 +109,18 @@ OVExeNetwork OVCore::CompileModel(const std::string& onnx_model,
}
}

OVExeNetwork OVCore::ImportModel(std::shared_ptr<std::istringstream> model_stream,
OVExeNetwork OVCore::ImportModel(const std::string& model_string,
std::string hw_target,
const ov::AnyMap& device_config,
bool embed_mode,
std::string name) {
try {
ov::CompiledModel obj;
if (embed_mode) {
obj = oe.import_model(*model_stream, hw_target, device_config);
std::istringstream model_stream(model_string);
obj = oe.import_model(model_stream, hw_target, device_config);
} else {
std::string blob_file_path = (*model_stream).str();
std::ifstream modelStream(blob_file_path, std::ios_base::binary | std::ios_base::in);
std::ifstream modelStream(model_string, std::ios_base::binary | std::ios_base::in);
obj = oe.import_model(modelStream,
hw_target,
{});
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/openvino/ov_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class OVCore {
ov::AnyMap& device_config,
const std::string& name);
// OV Interface for Import model Stream
OVExeNetwork ImportModel(std::shared_ptr<std::istringstream> model_stream,
OVExeNetwork ImportModel(const std::string& model_string,
std::string hw_target,
const ov::AnyMap& device_config,
bool embed_mode,
Expand Down
8 changes: 4 additions & 4 deletions onnxruntime/core/providers/openvino/ov_versions/capability.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ GetCapability::GetCapability(const GraphViewer& graph_viewer_param,
device_type_ = "CPU";
if (enable_qdq_optimizer) npu_qdq_optimizer_enabled = true;
}
#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 3
data_ops_ = new DataOps(graph_viewer_, V_2024_3, device_type_, npu_qdq_optimizer_enabled);
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 4
#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 4
data_ops_ = new DataOps(graph_viewer_, V_2024_4, device_type_, npu_qdq_optimizer_enabled);
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 5
data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled);
#elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 0
data_ops_ = new DataOps(graph_viewer_, V_2025_0, device_type_, npu_qdq_optimizer_enabled);
#else
data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled);
data_ops_ = new DataOps(graph_viewer_, V_2025_0, device_type_, npu_qdq_optimizer_enabled);
#endif
}

Expand Down
12 changes: 8 additions & 4 deletions onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ void DataOps::populate_op_mode_supported() {
no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}});
no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}});
no_dimension_supported_.push_back({"Identity", V_2023_0, {"All"}});
no_dimension_supported_.push_back({"If", V_2022_3, {"CPU", "GPU"}});
no_dimension_supported_.push_back({"Less", V_2022_1, {"CPU"}});
no_dimension_supported_.push_back({"Loop", V_2021_4, {"All"}});
no_dimension_supported_.push_back({"Min", V_2020_4, {"All"}});
Expand Down Expand Up @@ -387,7 +388,7 @@ void DataOps::populate_op_mode_supported() {

// populate unsupportedmode_t
{
UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5, V_2025_0},
[this](const Node* node, const InitializedTensorSet&) {
// If the Input of ReduceMax op is UINT8, it is rejected (Due to output mismatch)
for (size_t i = 0; i < node->InputDefs().size(); i++) {
Expand All @@ -402,7 +403,8 @@ void DataOps::populate_op_mode_supported() {
op_list_.insert({"ReduceMax", obj});
}
{
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2,
V_2024_3, V_2024_4, V_2024_5, V_2025_0},
[this](const Node* node, const InitializedTensorSet&) {
const auto& input_arg = node->InputDefs()[1];
auto shape = input_arg->Shape();
Expand All @@ -419,7 +421,8 @@ void DataOps::populate_op_mode_supported() {
op_list_.insert({"Reshape", obj});
}
{
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2,
V_2024_3, V_2024_4, V_2024_5, V_2025_0},
[this](const Node* node, const InitializedTensorSet&) {
// If the operator is unsqueeze
// If axes is an input, then we cannot produce a static graph.
Expand All @@ -434,7 +437,8 @@ void DataOps::populate_op_mode_supported() {
op_list_.insert({"Unsqueeze", obj});
}
{
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5,
V_2025_0},
[this](const Node* node, const InitializedTensorSet&) {
// check for attributes
auto& upsample_attr = node->GetAttributes();
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/providers/openvino/ov_versions/data_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ enum versionNum {
V_2024_2,
V_2024_3,
V_2024_4,
V_2024_5
V_2024_5,
V_2025_0
};

using VersionNum = enum versionNum;
Expand Down
Loading
Loading