Skip to content

Commit

Permalink
Openvino ep ort 23.1 (microsoft#17911)
Browse files Browse the repository at this point in the history
### Description
Integration to OpenVINO 2023.1


### Motivation and Context

- Alignment with latest OpenVINO Version. 
- Device name change from VPUX to NPU and Remove from supported list
until official public support is available.

---------

Co-authored-by: Sahar Fatima <[email protected]>
Co-authored-by: Saurabh Kale <[email protected]>
Co-authored-by: Suryaprakash Shanmugam <[email protected]>
Co-authored-by: sfatimar <[email protected]>
  • Loading branch information
5 people authored Nov 1, 2023
1 parent 69f0297 commit d87216b
Show file tree
Hide file tree
Showing 35 changed files with 564 additions and 358 deletions.
18 changes: 0 additions & 18 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1282,14 +1282,6 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
endif()

if (onnxruntime_USE_OPENVINO_VPUX_FP16)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
endif()

if (onnxruntime_USE_OPENVINO_VPUX_U8)
add_definitions(-DOPENVINO_CONFIG_VPUX_U8=1)
endif()

if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
Expand All @@ -1310,16 +1302,6 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_VPUX_FP32_NP)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP32=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_VPUX_FP16_NP)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_HETERO)
add_definitions(-DOPENVINO_CONFIG_HETERO=1)
add_definitions(-DDEVICE_NAME="${onnxruntime_USE_OPENVINO_DEVICE}")
Expand Down
2 changes: 0 additions & 2 deletions docs/python/ReadMeOV.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man
- Intel® CPUs
- Intel® integrated GPUs
- Intel® discrete GPUs
- Intel® integrated VPUs

Installation
------------
Expand All @@ -22,7 +21,6 @@ This package supports:
- Intel® CPUs
- Intel® integrated GPUs
- Intel® discrete GPUs
- Intel® integrated VPUs

``pip3 install onnxruntime-openvino``

Expand Down
4 changes: 2 additions & 2 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ typedef struct OrtMIGraphXProviderOptions {
typedef struct OrtOpenVINOProviderOptions {
#ifdef __cplusplus
OrtOpenVINOProviderOptions() : device_type{},
enable_vpu_fast_compile{},
enable_npu_fast_compile{},
device_id{},
num_of_threads{},
cache_dir{},
Expand All @@ -624,7 +624,7 @@ typedef struct OrtOpenVINOProviderOptions {
* Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
*/
const char* device_type;
unsigned char enable_vpu_fast_compile; ///< 0 = disabled, nonzero = enabled
unsigned char enable_npu_fast_compile; ///< 0 = disabled, nonzero = enabled
const char* device_id;
size_t num_of_threads; ///< 0 = Use default number of threads
const char* cache_dir; // path is set to empty by default
Expand Down
24 changes: 16 additions & 8 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
// Licensed under the MIT License

#include <fstream>
#include <vector>
#include <string>
#include <memory>
#include <utility>

#include "core/providers/shared_library/provider_api.h"
#include "contexts.h"
Expand All @@ -18,7 +16,8 @@ namespace openvino_ep {
static std::unique_ptr<GlobalContext> g_global_context;

GlobalContext& BackendManager::GetGlobalContext() {
// This is not thread safe to call for the first time, but it is first called on the main thread by the constructor so it is safe.
// This is not thread safe to call for the first time,
// but it is first called on the main thread by the constructor so it is safe.
if (!g_global_context)
g_global_context = std::make_unique<GlobalContext>();
return *g_global_context;
Expand Down Expand Up @@ -88,7 +87,9 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
<< "Backend created for graph " << subgraph_context_.subgraph_name;
}
} else {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. "
<< "Initializing backend for graph "
<< subgraph_context_.subgraph_name;

subgraph_context_.has_dynamic_input_shape = false;
try {
Expand All @@ -104,7 +105,7 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
bool BackendManager::ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const {
bool has_batched_inputs = true;

for (int i = 0; i < (int)subgraph_context_.input_indexes.size(); i++) {
for (int i = 0; i < static_cast<int>(subgraph_context_.input_indexes.size()); i++) {
auto& input = model_proto.graph().input(subgraph_context_.input_indexes[i]);

// Batch-process only raw image inputs (NCHW or NHWC layouts)
Expand Down Expand Up @@ -215,7 +216,10 @@ BackendManager::ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_pr
auto graph_proto = model_copy->mutable_graph();

for (size_t i = 0, limit = input_shapes.size(); i < limit; i++) {
auto g_in_shape = graph_proto->mutable_input((int)i)->mutable_type()->mutable_tensor_type()->mutable_shape();
auto g_in_shape = graph_proto->mutable_input(static_cast<int>(i))
->mutable_type()
->mutable_tensor_type()
->mutable_shape();
g_in_shape->clear_dim();
const auto& shape = input_shapes[i];
for (size_t dim = 0, end = shape.size(); dim < end; dim++) {
Expand All @@ -234,7 +238,11 @@ BackendManager::ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_p
auto graph_proto = model_copy->mutable_graph();

for (int i = 0; i < graph_proto->input_size(); i++) {
ONNX_NAMESPACE::TensorShapeProto* g_in_shape = graph_proto->mutable_input((int)i)->mutable_type()->mutable_tensor_type()->mutable_shape();
ONNX_NAMESPACE::TensorShapeProto* g_in_shape =
graph_proto->mutable_input(static_cast<int>(i))
->mutable_type()
->mutable_tensor_type()
->mutable_shape();
g_in_shape->mutable_dim(0)->clear_dim_value();
g_in_shape->mutable_dim(0)->set_dim_value(1);
}
Expand Down
13 changes: 11 additions & 2 deletions onnxruntime/core/providers/openvino/backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@

#pragma once

#include <vector>
#include <map>
#include <memory>
#include <string>

#include "ov_interface.h"
#include "contexts.h"
#include "ibackend.h"
Expand All @@ -13,15 +18,19 @@ namespace openvino_ep {
// Singleton class that manages all the backends
class BackendManager {
public:
BackendManager(const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger);
BackendManager(const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger);
void Compute(OrtKernelContext* context);
void ShutdownBackendManager();
static GlobalContext& GetGlobalContext();
static void ReleaseGlobalContext();

private:
std::unique_ptr<ONNX_NAMESPACE::ModelProto> GetModelProtoFromFusedNode(
const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger) const;
const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger) const;
bool ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& subgraph) const;
bool ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const;

Expand Down
11 changes: 5 additions & 6 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
// Copyright (C) 2019-2022 Intel Corporation
// Licensed under the MIT License

#include <map>
#include <string>
#include <memory>
#include <algorithm>
#include <sstream>
#include <fstream>

Expand Down Expand Up @@ -58,7 +56,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
try {
auto cnn_network = global_context.ie_core.ReadModel(model);
if ((subgraph_context.precision == "FP16") &&
(global_context.device_type.find("VPUX") == std::string::npos)) {
(global_context.device_type.find("NPU") == std::string::npos)) {
// FP16 transformations
ov::pass::ConvertFP32ToFP16 pass_obj;
pass_obj.run_on_model(cnn_network);
Expand Down Expand Up @@ -88,7 +86,8 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
size_t index = results.size() - 1;

for (auto it = results.rbegin(); it != results.rend(); ++it) {
if (auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
if (auto const_node =
std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
const_outputs_map[(*it)->get_friendly_name()] = const_node;
results.erase(results.begin() + index);
}
Expand Down Expand Up @@ -254,7 +253,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,

void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
std::ostream& stream, std::string deviceName) {
long long totalTime = 0;
int64_t totalTime = 0;
// Print performance counts
stream << std::endl
<< "performance counts:" << std::endl
Expand Down
12 changes: 10 additions & 2 deletions onnxruntime/core/providers/openvino/backend_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,15 @@
#pragma once

#define ORT_API_MANUAL_INIT
#include <iomanip>
#include <unordered_map>
#include <map>
#include <memory>
#include <vector>
#include <string>

#include "core/session/onnxruntime_cxx_api.h"
#include "contexts.h"
#include <iomanip>
#include "ov_interface.h"
#ifdef _WIN32
#include <direct.h>
Expand Down Expand Up @@ -57,7 +63,9 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
size_t batch_slice_idx);

std::shared_ptr<OVNetwork>
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context,
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto,
const GlobalContext& global_context,
const SubGraphContext& subgraph_context,
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);

void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
const SubGraphContext& subgraph_context) {
std::string type = global_context.device_type;
if (type == "CPU" || type.find("GPU") != std::string::npos ||
type.find("VPUX") != std::string::npos ||
type.find("NPU") != std::string::npos ||
type.find("HETERO") != std::string::npos ||
type.find("MULTI") != std::string::npos ||
type.find("AUTO") != std::string::npos) {
Expand Down
Loading

0 comments on commit d87216b

Please sign in to comment.