Skip to content

Commit

Permalink
Openvino ep ort 5.0 (microsoft#15626)
Browse files Browse the repository at this point in the history
### Description
The PR adds VPU support to OpenVINO Execution Provider
Bug fixes for GPU, CPU. 
Changes to OpenVINO Backend in Serialized Model API for faster First
Inference Latency.
Deprecation to HDDL-VADM and MYRIAD, removed code
Support OpenVINO 2023.0 
Dynamic Shapes Support for iGPU

### Motivation and Context
- VPU is an upcoming hardware that can provide AI Acceleration for
Client Systems through OpenVINO
- If it fixes an open issue, please link to the issue here. -->

---------

Signed-off-by: MaajidKhan <[email protected]>
Co-authored-by: Suryaprakash Shanmugam <[email protected]>
Co-authored-by: MaajidKhan <[email protected]>
Co-authored-by: Preetha Veeramalai <[email protected]>
  • Loading branch information
4 people authored and ShukantPal committed May 7, 2023
1 parent e1ee85b commit 463fc87
Show file tree
Hide file tree
Showing 36 changed files with 704 additions and 1,301 deletions.
32 changes: 13 additions & 19 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1139,17 +1139,16 @@ if (onnxruntime_USE_OPENVINO)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2022.3")
set(OPENVINO_VERSION "2022.3")
add_definitions(-DOPENVINO_2022_3=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0")
set(OPENVINO_VERSION "2023.0")
add_definitions(-DOPENVINO_2023_0=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino")
set(OPENVINO_VERSION "2022.3")
add_definitions(-DOPENVINO_2022_3=1)
set(OPENVINO_VERSION "2023.0")
add_definitions(-DOPENVINO_2023_0=1)
else()
message(FATAL_ERROR "Unsupported OpenVINO version: ${INTEL_OPENVINO_DIR}")
endif()

if (onnxruntime_USE_OPENVINO_MYRIAD)
add_definitions(-DOPENVINO_CONFIG_MYRIAD=1)
endif()

if (onnxruntime_USE_OPENVINO_GPU_FP32)
add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
endif()
Expand All @@ -1166,17 +1165,12 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
endif()

if (onnxruntime_USE_OPENVINO_VAD_M)
add_definitions(-DOPENVINO_CONFIG_VAD_M=1)
if (onnxruntime_USE_OPENVINO_VPUX_FP16)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
endif()

if (onnxruntime_USE_OPENVINO_VAD_F)
add_definitions(-DOPENVINO_CONFIG_VAD_F=1)
endif()

if (onnxruntime_USE_OPENVINO_MYRIAD_NP)
add_definitions(-DOPENVINO_CONFIG_MYRIAD=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
if (onnxruntime_USE_OPENVINO_VPUX_U8)
add_definitions(-DOPENVINO_CONFIG_VPUX_U8=1)
endif()

if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
Expand All @@ -1199,13 +1193,13 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_VAD_M_NP)
add_definitions(-DOPENVINO_CONFIG_VAD_M=1)
if (onnxruntime_USE_OPENVINO_VPUX_FP32_NP)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP32=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_VAD_F_NP)
add_definitions(-DOPENVINO_CONFIG_VAD_F=1)
if (onnxruntime_USE_OPENVINO_VPUX_FP16_NP)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

Expand Down
8 changes: 2 additions & 6 deletions docs/python/ReadMeOV.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ OpenVINO™ Execution Provider for ONNX Runtime
OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across many `AI models <https://github.com/onnx/models>`_ on a variety of Intel® hardware such as:
- Intel® CPUs
- Intel® integrated GPUs
- Intel® Movidius™ Vision Processing Units - referred to as VPU.

- Intel® discrete GPUs

Installation
------------
Expand All @@ -21,9 +20,6 @@ Requirements
This package supports:
- Intel® CPUs
- Intel® integrated GPUs
- Intel® Movidius™ Vision Processing Units (VPUs).

Please Note for VAD-M use Docker installation / Build from Source for Linux.

``pip3 install onnxruntime-openvino==1.13.1``

Expand All @@ -40,7 +36,7 @@ For more details on build and installation please refer to `Build <https://onnxr
Usage
^^^^^

By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated GPU or Intel® VPU for AI inferencing.
By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated or discrete GPU.
Invoke `the provider config device type argument <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options>`_ to change the hardware on which inferencing is done.

For more API calls and environment variables, see `Usage <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#configuration-options>`_.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ extern "C" {

/**
* \param device_type openvino device type and precision. Could be any of
* CPU_FP32, CPU_FP16, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16 or VAD-F_FP32.
* CPU_FP32, CPU_FP16, GPU_FP32, GPU_FP16
*/
ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_OpenVINO,
_In_ OrtSessionOptions* options, _In_ const char* device_type);
Expand Down
2 changes: 1 addition & 1 deletion include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ typedef struct OrtOpenVINOProviderOptions {
#endif
/** \brief Device type string
*
* Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16", "MYRIAD_FP16", "VAD-M_FP16" or "VAD-F_FP32"
* Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
*/
const char* device_type;
unsigned char enable_vpu_fast_compile; ///< 0 = disabled, nonzero = enabled
Expand Down
53 changes: 9 additions & 44 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
subgraph_context_.precision = InferenceEngine::Precision::FP32;
} else if (prec_str == "FP16") {
subgraph_context_.precision = InferenceEngine::Precision::FP16;
} else if (prec_str == "U8") {
subgraph_context_.precision = InferenceEngine::Precision::U8;
} else {
throw std::string("Invalid OpenVINO Precision type: " + prec_str);
}
Expand All @@ -54,14 +56,6 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,

auto graph_inputs = subgraph.GetInputs();
for (auto input : graph_inputs) {
if (GetGlobalContext().device_type.find("MYRIAD") != std::string::npos) {
auto shape = input->Shape();
if (shape != nullptr) {
if (shape->dim_size() != 4) {
subgraph_context_.set_vpu_config = true;
}
}
}
auto it = subgraph_context_.input_names.find(input->Name());
if (it == subgraph_context_.input_names.end()) {
throw std::string("Input not found in the input defs list");
Expand All @@ -79,30 +73,11 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
subgraph_context_.subgraph_name = fused_node.Name();
model_proto_ = GetModelProtoFromFusedNode(fused_node, subgraph, logger);

if (ModelHasBatchedInputs(*model_proto_) &&
GetGlobalContext().is_wholly_supported_graph &&
GetGlobalContext().device_type.find("HDDL") != std::string::npos) {
subgraph_context_.enable_batching = true;
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model can be Batch inferenced \n";
auto model_copy = ReWriteBatchDimWithOne(*model_proto_);
try {
concrete_backend_ = BackendFactory::MakeBackend(*model_copy,
GetGlobalContext(),
subgraph_context_);
} catch (std::string const& msg) {
throw msg;
}
subgraph_context_.has_dynamic_input_shape = false;

} else if (ModelHasSymbolicInputDims(subgraph)) {
if (ModelHasSymbolicInputDims(subgraph)) {
subgraph_context_.has_dynamic_input_shape = true;
if (GetGlobalContext().device_type.find("MYRIAD") != std::string::npos) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims."
" Defering backend initialization and device_type is MYRIAD.";
}
if (GetGlobalContext().device_type.find("CPU") != std::string::npos) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims and "
<< "device_type is CPU.";
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
if (GetGlobalContext().device_type.find("CPU") != std::string::npos ||
GetGlobalContext().device_type.find("GPU") != std::string::npos) {
if (GetGlobalContext().enable_dynamic_shapes) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
<< "Creating backend Dynamic Shapes";
Expand All @@ -117,12 +92,8 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
<< "Backend created for graph " << subgraph_context_.subgraph_name;
}
}
} else if (ModelHasSymbolicInputDims(subgraph) &&
GetGlobalContext().device_type.find("GPU") != std::string::npos) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims. Defering backend initialization";
subgraph_context_.has_dynamic_input_shape = true;
} else {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concreate input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;

subgraph_context_.has_dynamic_input_shape = false;
try {
Expand Down Expand Up @@ -287,20 +258,14 @@ void BackendManager::Compute(OrtKernelContext* context) {
#endif
bool use_dynamic_backend = true;
if (GetGlobalContext().enable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape &&
GetGlobalContext().device_type.find("CPU") != std::string::npos) {
(GetGlobalContext().device_type.find("CPU") != std::string::npos ||
GetGlobalContext().device_type.find("GPU") != std::string::npos)) {
concrete_backend_->Infer(context);
use_dynamic_backend = false;
} else if (use_dynamic_backend && subgraph_context_.has_dynamic_input_shape) {
std::vector<std::vector<int64_t>> tensor_shapes = GetInputTensorShapes(ctx);
auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type);

if (GetGlobalContext().device_type.find("MYRIAD") != std::string::npos) {
for (size_t i = 0; i < subgraph_context_.input_indexes.size(); i++) {
if (tensor_shapes[i].size() != 4)
subgraph_context_.set_vpu_config = true;
}
}

std::shared_ptr<IBackend> dynamic_backend;
auto search = backend_map_.find(key);
if (search == backend_map_.end()) {
Expand Down
12 changes: 4 additions & 8 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
try {
auto cnn_network = global_context.ie_core.ReadModel(model);
if ((subgraph_context.precision == InferenceEngine::Precision::FP16) &&
(global_context.device_type.find("MYRIAD") == std::string::npos)) {
(global_context.device_type.find("VPUX") == std::string::npos)) {
// FP16 transformations
ov::pass::ConvertFP32ToFP16 pass_obj;
pass_obj.run_on_model(cnn_network);
Expand Down Expand Up @@ -96,7 +96,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
}
}
#ifndef NDEBUG
#if defined OPENVINO_2022_3
#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0)
if (IsDebugEnabled()) {
std::string name = cnn_network->get_friendly_name();
ov::pass::Serialize serializer(name + ".xml", name + ".bin");
Expand All @@ -111,7 +111,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
}
}

InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type, std::string device) {
InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type) {
ONNX_NAMESPACE::DataType type_string = ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(onnx_type);
if (*type_string == "float" || *type_string == "tensor(float)") {
return InferenceEngine::Precision::FP32;
Expand All @@ -128,11 +128,7 @@ InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::
} else if (*type_string == "uint8" || *type_string == "tensor(uint8)") {
return InferenceEngine::Precision::U8;
} else if (*type_string == "bool" || *type_string == "tensor(bool)") {
if (device == "MYRIAD") {
return InferenceEngine::Precision::I32;
} else {
return InferenceEngine::Precision::U8;
}
return InferenceEngine::Precision::U8;
} else if (*type_string == "int64" || *type_string == "tensor(int64)") {
return InferenceEngine::Precision::I32;
} else {
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/openvino/backend_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ GetOutputTensor(Ort::KernelContext& context,
std::shared_ptr<ngraph::Node> node);

InferenceEngine::Precision
ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type, std::string device);
ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type);

Ort::UnownedValue
GetOutputTensor(Ort::KernelContext& context, size_t batch_size,
Expand Down
11 changes: 5 additions & 6 deletions onnxruntime/core/providers/openvino/backends/backend_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include "core/providers/openvino/contexts.h"
#include "core/providers/openvino/ibackend.h"
#include "basic_backend.h"
#include "vadm_backend.h"

namespace onnxruntime {
namespace openvino_ep {
Expand All @@ -16,11 +15,11 @@ BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
GlobalContext& global_context,
const SubGraphContext& subgraph_context) {
std::string type = global_context.device_type;
if (type.find("HDDL") != std::string::npos) {
return std::make_shared<VADMBackend>(model_proto, global_context, subgraph_context);
} else if (type == "CPU" || type.find("GPU") != std::string::npos || type == "MYRIAD" ||
type.find("HETERO") != std::string::npos || type.find("MULTI") != std::string::npos ||
type.find("AUTO") != std::string::npos) {
if (type == "CPU" || type.find("GPU") != std::string::npos ||
type.find("VPUX") != std::string::npos ||
type.find("HETERO") != std::string::npos ||
type.find("MULTI") != std::string::npos ||
type.find("AUTO") != std::string::npos) {
std::shared_ptr<IBackend> concrete_backend_;
try {
concrete_backend_ = std::make_shared<BasicBackend>(model_proto, global_context, subgraph_context);
Expand Down
Loading

0 comments on commit 463fc87

Please sign in to comment.