From 4d1963c2a2275a775eb52d5a578faa750d71b8c8 Mon Sep 17 00:00:00 2001
From: sfatimar <sahar.fatima@intel.com>
Date: Fri, 19 Apr 2024 13:01:38 +0530
Subject: [PATCH] OpenVINO EP Rel 1.18 Changes (#20337)

### Description
These changes include
Support to OpenVINO 2024.1
Import PreCompiled Blobs with EPContext Blob
Separate Device/Precision as input
Deprecate CPU_FP32 , GPU_FP32 terminology , introduce CPU, GPU
AUTO GPU, CPU will only create GPU Blob and not CPU Blob.


### Motivation and Context
- OpenVINO 2024.1 will be out soon
- Import Precompiled Blob can greatly reduce FEIL/FIL Time.
- Separating Device/Precision will make the input cleaner
-

---------

Co-authored-by: Suryaprakash Shanmugam <suryaprakash.shanmugam@intel.com>
Co-authored-by: Preetha Veeramalai <preetha.veeramalai@intel.com>
---
 cmake/CMakeLists.txt                          |  34 +-
 dockerfiles/Dockerfile.openvino               |   2 +-
 .../providers/openvino/backend_manager.cc     |  77 +++-
 .../core/providers/openvino/backend_manager.h |  14 +-
 .../core/providers/openvino/backend_utils.cc  |   4 +-
 .../core/providers/openvino/backend_utils.h   |   4 +-
 .../openvino/backends/backend_factory.cc      |   7 +-
 .../openvino/backends/basic_backend.cc        | 103 ++++--
 .../openvino/backends/basic_backend.h         |  10 +-
 .../core/providers/openvino/contexts.h        |   8 +-
 .../core/providers/openvino/ibackend.h        |   5 +-
 .../openvino/onnx_ctx_model_helper.cc         | 123 ++++++
 .../openvino/onnx_ctx_model_helper.h          |  45 +++
 .../openvino/openvino_execution_provider.cc   |  96 +++--
 .../openvino/openvino_execution_provider.h    |  92 ++---
 .../openvino/openvino_provider_factory.cc     | 156 ++++++--
 .../core/providers/openvino/ov_interface.cc   | 134 +++++--
 .../core/providers/openvino/ov_interface.h    |  34 +-
 .../openvino/ov_versions/capability.cc        |  29 +-
 .../openvino/ov_versions/capability.h         |   6 +-
 .../openvino/ov_versions/data_ops.cc          | 349 +-----------------
 .../providers/openvino/ov_versions/data_ops.h |  12 +-
 .../core/session/provider_bridge_ort.cc       |   6 +-
 .../python/onnxruntime_pybind_state.cc        |  10 +-
 .../python/onnxruntime_pybind_state_common.h  |  14 +-
 .../test/perftest/command_args_parser.cc      |   2 +-
 onnxruntime/test/perftest/ort_test_session.cc |  59 ++-
 .../cpu/activation/activation_op_test.h       |   2 +-
 .../test/providers/cpu/math/clip_test.cc      |   2 +-
 .../cpu/math/element_wise_ops_test.cc         |  26 +-
 .../test/providers/cpu/math/gemm_test.cc      |  14 +-
 .../providers/cpu/nn/batch_norm_op_test.cc    |   2 +-
 .../test/providers/cpu/nn/pool_op_test.cc     |   2 +-
 .../cpu/reduction/reduction_ops_test.cc       |   6 +-
 .../providers/cpu/tensor/scatter_op_test.cc   |   2 +-
 .../providers/cpu/tensor/where_op_test.cc     |   4 +-
 .../test/python/onnx_backend_test_series.py   |   6 +-
 .../onnx_backend_test_series_filters.jsonc    |   2 +-
 tools/ci_build/build.py                       |  28 +-
 .../linux-openvino-ci-pipeline.yml            |   2 +-
 .../py-package-build-pipeline.yml             |   2 +-
 .../py-packaging-selectable-stage.yml         |   2 +-
 42 files changed, 827 insertions(+), 710 deletions(-)
 create mode 100644 onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc
 create mode 100644 onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index a5cadc937e63d..1795052953d8c 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -1325,43 +1325,25 @@ if (onnxruntime_USE_OPENVINO)
 
   add_definitions(-DUSE_OPENVINO=1)
 
-  if (onnxruntime_USE_OPENVINO_GPU_FP32)
-    add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
+  if (onnxruntime_USE_OPENVINO_GPU)
+    add_definitions(-DOPENVINO_CONFIG_GPU=1)
   endif()
 
-  if (onnxruntime_USE_OPENVINO_GPU_FP16)
-    add_definitions(-DOPENVINO_CONFIG_GPU_FP16=1)
-  endif()
-
-  if (onnxruntime_USE_OPENVINO_CPU_FP32)
-    add_definitions(-DOPENVINO_CONFIG_CPU_FP32=1)
-  endif()
-
-  if (onnxruntime_USE_OPENVINO_CPU_FP16)
-    add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
+  if (onnxruntime_USE_OPENVINO_CPU)
+    add_definitions(-DOPENVINO_CONFIG_CPU=1)
   endif()
 
   if (onnxruntime_USE_OPENVINO_NPU)
     add_definitions(-DOPENVINO_CONFIG_NPU=1)
   endif()
 
-  if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
-    add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
-    add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
-  endif()
-
-  if (onnxruntime_USE_OPENVINO_GPU_FP16_NP)
-    add_definitions(-DOPENVINO_CONFIG_GPU_FP16=1)
-    add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
-  endif()
-
-  if (onnxruntime_USE_OPENVINO_CPU_FP32_NP)
-    add_definitions(-DOPENVINO_CONFIG_CPU_FP32=1)
+  if (onnxruntime_USE_OPENVINO_GPU_NP)
+    add_definitions(-DOPENVINO_CONFIG_GPU=1)
     add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
   endif()
 
-  if (onnxruntime_USE_OPENVINO_CPU_FP16_NP)
-    add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
+  if (onnxruntime_USE_OPENVINO_CPU_NP)
+    add_definitions(-DOPENVINO_CONFIG_CPU=1)
     add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
   endif()
 
diff --git a/dockerfiles/Dockerfile.openvino b/dockerfiles/Dockerfile.openvino
index 049916fac92f1..75898770acf28 100644
--- a/dockerfiles/Dockerfile.openvino
+++ b/dockerfiles/Dockerfile.openvino
@@ -13,7 +13,7 @@ ENV WORKDIR_PATH=/home/openvino
 WORKDIR $WORKDIR_PATH
 ENV DEBIAN_FRONTEND noninteractive
 
-ARG DEVICE=CPU_FP32
+ARG DEVICE=CPU
 ARG ONNXRUNTIME_REPO=https://github.com/microsoft/onnxruntime.git
 ARG ONNXRUNTIME_BRANCH=main
 
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 3252603e33389..db0a33c557353 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -2,14 +2,14 @@
 // Licensed under the MIT License
 
 #include <fstream>
+#include <sstream>
 #include <utility>
-#include <exception>
 
 #include "core/providers/shared_library/provider_api.h"
-#include "contexts.h"
-#include "backend_manager.h"
-#include "ibackend.h"
-#include "backend_utils.h"
+#include "core/providers/openvino/contexts.h"
+#include "core/providers/openvino/backend_manager.h"
+#include "core/providers/openvino/ibackend.h"
+#include "core/providers/openvino/backend_utils.h"
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -21,8 +21,17 @@ GlobalContext& BackendManager::GetGlobalContext() {
 BackendManager::BackendManager(const GlobalContext& global_context,
                                const onnxruntime::Node& fused_node,
                                const onnxruntime::GraphViewer& subgraph,
-                               const logging::Logger& logger) {
+                               const logging::Logger& logger,
+                               EPCtxHandler& ctx_handle) {
   global_context_ = global_context;
+  ep_ctx_handle_ = ctx_handle;
+
+  openvino_sdk_version_ = std::to_string(global_context_.OpenVINO_Version.at(0)) + "." +
+                          std::to_string(global_context_.OpenVINO_Version.at(1));
+  if (ep_ctx_handle_.CheckForOVEPCtxNode(subgraph, openvino_sdk_version_)) {
+    if (ep_ctx_handle_.ImportBlobFromEPCtxModel(subgraph) != Status::OK())
+      ORT_THROW("Import blob from model failed");
+  }
 
   auto prec_str = GetGlobalContext().precision_str;
 
@@ -66,7 +75,8 @@ BackendManager::BackendManager(const GlobalContext& global_context,
         try {
           concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
                                                           GetGlobalContext(),
-                                                          subgraph_context_);
+                                                          subgraph_context_,
+                                                          ep_ctx_handle_);
         } catch (std::string const& msg) {
           ORT_THROW(msg);
         }
@@ -85,7 +95,8 @@ BackendManager::BackendManager(const GlobalContext& global_context,
     try {
       concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
                                                       GetGlobalContext(),
-                                                      subgraph_context_);
+                                                      subgraph_context_,
+                                                      ep_ctx_handle_);
     } catch (const OnnxRuntimeException& ex) {
       if (device_type.find("NPU") != std::string::npos) {
         LOGS_DEFAULT(WARNING) << ex.what();
@@ -96,7 +107,8 @@ BackendManager::BackendManager(const GlobalContext& global_context,
         try {
           concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
                                                           GetGlobalContext(),
-                                                          subgraph_context_);
+                                                          subgraph_context_,
+                                                          ep_ctx_handle_);
         } catch (std::string const& msg) {
           ORT_THROW(msg);
         }
@@ -107,6 +119,45 @@ BackendManager::BackendManager(const GlobalContext& global_context,
   }
 }
 
+// Call EPContext model exporter here if the provider option for exporting
+// precompiled blob is set. If that's the case:
+// By default, create model in embed mode where the blob stream is exported as data within
+// the EPContext node.
+Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& graph_body_viewer,
+                                                     const logging::Logger& logger) {
+  std::string model_blob_str;
+  auto compiled_model = concrete_backend_->GetOVCompiledModel();
+  auto graph_name = global_context_.onnx_model_path_name;
+  // Remove extension so we can append suffix to form the complete name of output graph
+  graph_name = [&]() {
+    size_t dot = graph_name.find_last_of(".");
+    if (dot == std::string::npos) return graph_name;
+    return graph_name.substr(0, dot);
+  }();
+  // If embed_mode, then pass on the serialized blob
+  // If not embed_mode, dump the blob here and only pass on the path to the blob
+  if (global_context_.ep_context_embed_mode) {
+    std::ostringstream model_blob_stream;
+    compiled_model.export_model(model_blob_stream);
+    model_blob_str = model_blob_stream.str();
+    ORT_ENFORCE(model_blob_str.size() != 0);
+  } else {
+    std::ofstream f(graph_name + ".blob", std::ios::out | std::ios::trunc | std::ios::binary);
+    compiled_model.export_model(f);
+    model_blob_str = graph_name + ".blob";
+  }
+
+  ORT_RETURN_IF_ERROR(ep_ctx_handle_.ExportEPCtxModel(graph_body_viewer,
+                                                      graph_name,
+                                                      logger,
+                                                      global_context_.ep_context_embed_mode,
+                                                      model_blob_str,
+                                                      openvino_sdk_version_,
+                                                      GetGlobalContext().device_type));
+
+  return Status::OK();
+}
+
 bool BackendManager::ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const {
   bool has_batched_inputs = true;
 
@@ -182,7 +233,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
   return model_proto;
 }
 
-std::vector<std::vector<int64_t>> GetInputTensorShapes(Ort::KernelContext& context) {
+std::vector<std::vector<int64_t>> GetInputTensorShapes(const Ort::KernelContext& context) {
   const auto input_count = context.GetInputCount();
   std::vector<std::vector<int64_t>> input_shapes;
   input_shapes.reserve(input_count);
@@ -289,7 +340,8 @@ void BackendManager::Compute(OrtKernelContext* context) {
       try {
         dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes,
                                                       GetGlobalContext(),
-                                                      subgraph_context_);
+                                                      subgraph_context_,
+                                                      ep_ctx_handle_);
       } catch (const OnnxRuntimeException& ex) {
         if (GetGlobalContext().device_type.find("NPU") != std::string::npos) {
           LOGS_DEFAULT(WARNING) << ex.what();
@@ -301,7 +353,8 @@ void BackendManager::Compute(OrtKernelContext* context) {
           try {
             dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes,
                                                           GetGlobalContext(),
-                                                          subgraph_context_);
+                                                          subgraph_context_,
+                                                          ep_ctx_handle_);
           } catch (std::string const& msg) {
             ORT_THROW(msg);
           }
diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h
index 376ebea225a2b..805fd16b09fde 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.h
+++ b/onnxruntime/core/providers/openvino/backend_manager.h
@@ -8,9 +8,10 @@
 #include <memory>
 #include <string>
 
-#include "ov_interface.h"
-#include "contexts.h"
-#include "ibackend.h"
+#include "core/providers/openvino/ov_interface.h"
+#include "core/providers/openvino/contexts.h"
+#include "core/providers/openvino/onnx_ctx_model_helper.h"
+#include "core/providers/openvino/ibackend.h"
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -21,11 +22,14 @@ class BackendManager {
   BackendManager(const GlobalContext& global_context,
                  const onnxruntime::Node& fused_node,
                  const onnxruntime::GraphViewer& subgraph,
-                 const logging::Logger& logger);
+                 const logging::Logger& logger,
+                 EPCtxHandler& ctx_handle);
   void Compute(OrtKernelContext* context);
   void ShutdownBackendManager();
   void SetGlobalCotext(const GlobalContext& global_context);
   GlobalContext& GetGlobalContext();
+  Status ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& subgraph,
+                                       const logging::Logger& logger);
 
  private:
   std::unique_ptr<ONNX_NAMESPACE::ModelProto> GetModelProtoFromFusedNode(
@@ -47,6 +51,8 @@ class BackendManager {
   std::map<std::string, std::shared_ptr<IBackend>> backend_map_;
   SubGraphContext subgraph_context_;
   GlobalContext global_context_;
+  EPCtxHandler ep_ctx_handle_{};
+  std::string openvino_sdk_version_{};
 };
 
 }  // namespace openvino_ep
diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
index 32b5ad7d5b66d..c64f3041a5069 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.cc
+++ b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -5,11 +5,11 @@
 #include <sstream>
 #include <fstream>
 
-#include "ov_interface.h"
 #include "openvino/pass/convert_fp32_to_fp16.hpp"
 #include "openvino/pass/constant_folding.hpp"
 #include "core/providers/shared_library/provider_api.h"
-#include "backend_utils.h"
+#include "core/providers/openvino/backend_utils.h"
+#include "core/providers/openvino/ov_interface.h"
 
 using Exception = ov::Exception;
 
diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
index 93fa874774469..13ecb153b98f2 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.h
+++ b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -12,8 +12,8 @@
 #include <string>
 
 #include "core/session/onnxruntime_cxx_api.h"
-#include "contexts.h"
-#include "ov_interface.h"
+#include "core/providers/openvino/contexts.h"
+#include "core/providers/openvino/ov_interface.h"
 #ifdef _WIN32
 #include <direct.h>
 #define GetCurrentDir _getcwd
diff --git a/onnxruntime/core/providers/openvino/backends/backend_factory.cc b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
index a0f4ce8f843b0..ce7e1c9f7c2b4 100644
--- a/onnxruntime/core/providers/openvino/backends/backend_factory.cc
+++ b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
@@ -5,7 +5,7 @@
 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/openvino/contexts.h"
 #include "core/providers/openvino/ibackend.h"
-#include "basic_backend.h"
+#include "core/providers/openvino/backends/basic_backend.h"
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -13,7 +13,8 @@ namespace openvino_ep {
 std::shared_ptr<IBackend>
 BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
                             GlobalContext& global_context,
-                            const SubGraphContext& subgraph_context) {
+                            const SubGraphContext& subgraph_context,
+                            EPCtxHandler& ep_ctx_handle) {
   std::string type = global_context.device_type;
   if (type == "CPU" || type.find("GPU") != std::string::npos ||
       type.find("NPU") != std::string::npos ||
@@ -22,7 +23,7 @@ BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
       type.find("AUTO") != std::string::npos) {
     std::shared_ptr<IBackend> concrete_backend_;
     try {
-      concrete_backend_ = std::make_shared<BasicBackend>(model_proto, global_context, subgraph_context);
+      concrete_backend_ = std::make_shared<BasicBackend>(model_proto, global_context, subgraph_context, ep_ctx_handle);
     } catch (std::string const& msg) {
       ORT_THROW(msg);
     }
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index 69d234a7c55ef..efaf0ca808a86 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -9,9 +9,10 @@
 #include <utility>
 
 #include "core/providers/shared_library/provider_api.h"
-#include "../backend_utils.h"
-#include "basic_backend.h"
-#include "../backend_manager.h"
+#include "core/providers/openvino/backend_utils.h"
+#include "core/providers/openvino/backends/basic_backend.h"
+#include "core/providers/openvino/onnx_ctx_model_helper.h"
+#include "core/providers/openvino/backend_manager.h"
 
 namespace onnxruntime {
 
@@ -21,9 +22,13 @@ using namespace backend_utils;
 
 BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
                            GlobalContext& global_context,
-                           const SubGraphContext& subgraph_context)
+                           const SubGraphContext& subgraph_context,
+                           EPCtxHandler& ep_ctx_handle)
     : global_context_(global_context), subgraph_context_(subgraph_context) {
-  std::string& hw_target = (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
+  std::string& hw_target = global_context_.device_type;
+
+  is_ep_ctx_graph_ = ep_ctx_handle.IsValidOVEPCtxGraph();
+
   if (ValidateSubgraph(const_outputs_map_))
     return;
 
@@ -50,47 +55,62 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
     model_proto.SerializeToOstream(outfile);
   }
 #endif
+
   try {
     std::string dev_prec = global_context.device_type + "_" + global_context_.precision_str;
-    if (global_context.is_wholly_supported_graph) {
+
+    if (global_context.is_wholly_supported_graph) {  // Full graph is supported
 #if defined(IO_BUFFER_ENABLED)
-      if ((global_context.device_type.find("GPU") != std::string::npos) &&
-          (global_context_.context != nullptr)) {
+      if (is_ep_ctx_graph_) {
+        std::istringstream model_stream(ep_ctx_handle.GetModelBlobString());
+        exe_network_ = global_context_.ie_core.ImportModel(model_stream,
+                                                           remote_context_,
+                                                           subgraph_context_.subgraph_name);
+        ie_cnn_network_ = exe_network_.Get().get_runtime_model();
+      } else if ((global_context.device_type.find("GPU") != std::string::npos) &&
+                 (global_context_.context != nullptr)) {
         LOGS_DEFAULT(INFO) << log_tag << "IO Buffering Enabled";
         cl_context ctx = static_cast<cl_context>(global_context_.context);
         remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_.ie_core.Get(), ctx);
         ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.LoadNetwork(
+        exe_network_ = global_context_.ie_core.CompileModel(
             ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name);
-        LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
+        ie_cnn_network_ = exe_network_.Get().get_runtime_model();
       } else {
         ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.LoadNetwork(
+        exe_network_ = global_context_.ie_core.CompileModel(
             ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
-        LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
       }
-#else
-      if (!subgraph_context_.has_dynamic_input_shape &&
-          global_context_.onnx_model_path_name != "" &&
-          dev_prec != "CPU_FP16") {
-        exe_network_ = global_context_.ie_core.LoadNetwork(global_context_.onnx_model_path_name,
+#else  // !IO_BUFFER_ENABLED
+      if (is_ep_ctx_graph_) {
+        // If the blob is held in an EPContext node, then skip FE+Compile
+        // and directly move on to creating a backend with the executable blob
+        exe_network_ = global_context_.ie_core.ImportModel(ep_ctx_handle.GetModelBlobStream(),
                                                            hw_target,
                                                            device_config,
                                                            subgraph_context_.subgraph_name);
-        LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
-      } else {
+        ie_cnn_network_ = exe_network_.Get().get_runtime_model();
+      } else if (!subgraph_context_.has_dynamic_input_shape &&
+                 global_context_.onnx_model_path_name.find(".onnx") != std::string ::npos) {
+        // Inputs with static dimenstions
+        exe_network_ = global_context_.ie_core.CompileModel(global_context_.onnx_model_path_name,
+                                                            hw_target,
+                                                            global_context_.cache_dir,
+                                                            device_config,
+                                                            subgraph_context_.subgraph_name);
+        ie_cnn_network_ = exe_network_.Get().get_runtime_model();
+      } else {  // Inputs with dynamic dimensions
         ie_cnn_network_ = CreateOVModel(model_proto, global_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.LoadNetwork(
+        exe_network_ = global_context_.ie_core.CompileModel(
             ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
-        LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
       }
 #endif
-    } else {
+    } else {  // Full graph is not supported
       ie_cnn_network_ = CreateOVModel(model_proto, global_context_, const_outputs_map_);
-      exe_network_ = global_context_.ie_core.LoadNetwork(
+      exe_network_ = global_context_.ie_core.CompileModel(
           ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
-      LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
     }
+    LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
   } catch (const char* msg) {
     ORT_THROW(msg);
   }
@@ -111,17 +131,34 @@ bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::No
 void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
   device_config = {};
   // Set inference precision based on device precision for OV backend
-  if (global_context_.precision_str.find("FP16") != std::string::npos && global_context_.device_type == "GPU") {
+  if (global_context_.precision_str.find("FP16") != std::string::npos &&
+      global_context_.device_type == "GPU") {
     device_config.emplace(ov::hint::inference_precision("f16"));
   }
   if (global_context_.precision_str.find("FP32") != std::string::npos) {
     device_config.emplace(ov::hint::inference_precision("f32"));
   }
+  if (global_context_.precision_str.find("ACCURACY") != std::string::npos &&
+      global_context_.device_type == "GPU") {
+    if (global_context_.OpenVINO_Version.at(0) >= 2024 && global_context_.OpenVINO_Version.at(1) >= 1) {
+      device_config.emplace(ov::hint::inference_precision(ov::element::undefined));
+      device_config.emplace(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY));
+    } else {
+      if (global_context_.model_precision != "")
+        device_config.emplace(ov::hint::inference_precision(global_context_.model_precision));
+    }
+  }
 #ifndef NDEBUG
   if (openvino_ep::backend_utils::IsDebugEnabled()) {
     device_config.emplace(ov::enable_profiling(true));
   }
 #endif
+
+  // Set a priority level for the current workload for preemption;  default priority is "DEFAULT"
+  // CPU Plugin doesn't support workload priority
+  if (global_context_.device_type.find("CPU") == std::string::npos)
+    device_config.emplace(ov::hint::model_priority(global_context_.model_priority));
+
   if (global_context_.device_type.find("NPU") != std::string::npos) {
     std::pair<std::string, ov::Any> device_property;
     device_property = std::make_pair("NPU_COMPILER_TYPE", "DRIVER");
@@ -135,9 +172,12 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
 }
 
 void BasicBackend::EnableCaching() {
+  // cache_dir argument has no effect when working with an embed-mode EPContext Graph
+  if (is_ep_ctx_graph_) return;
+
   if (!global_context_.cache_dir.empty()) {
     LOGS_DEFAULT(INFO) << log_tag << "Enables Caching";
-    global_context_.ie_core.SetCache(global_context_.cache_dir);
+    global_context_.ie_core.SetCache(global_context_.cache_dir, global_context_.device_type);
   }
 }
 
@@ -152,13 +192,19 @@ void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) {
 }
 
 void BasicBackend::EnableStreams() {
+  // Return silently for NPU as it's currently treated as a read-only flag by the NPU plugin
+  // and throws an exception for the same
+  if (global_context_.device_type.find("NPU") != std::string::npos)
+    return;
+
   // Streams can be set only if the device is not one of AUTO, MULTI, or HETERO
   // Throw an exception if the user tries to set num_streams for these devices
   if ((global_context_.device_type.find("MULTI") != std::string::npos) ||
       (global_context_.device_type.find("HETERO") != std::string::npos) ||
       (global_context_.device_type.find("AUTO") != std::string::npos)) {
     if (global_context_.num_streams != 1) {
-      ORT_THROW(log_tag + "Cannot set NUM_STREAMS to " + std::to_string(global_context_.num_streams) + " for device " + global_context_.device_type);
+      ORT_THROW(log_tag + "Cannot set NUM_STREAMS to " +
+                std::to_string(global_context_.num_streams) + " for device " + global_context_.device_type);
     }
     // Do nothing
   } else {
@@ -493,8 +539,7 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
 #ifndef IO_BUFFER_ENABLED  // Printing performance counts is disabled when IO_BUFFER_ENABLED
     if (openvino_ep::backend_utils::IsDebugEnabled()) {
       inferRequestsQueue_->printstatus();  // Printing the elements of infer_requests_ vector pool only in debug mode
-      std::string& hw_target =
-          (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
+      std::string& hw_target = global_context_.device_type;
       printPerformanceCounts(infer_request, std::cout, hw_target);
     }
 #endif
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
index 3502f660bbb20..5565223f067b8 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -25,12 +25,15 @@ class BasicBackend : public IBackend {
  public:
   BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
                GlobalContext& global_context,
-               const SubGraphContext& subgraph_context);
+               const SubGraphContext& subgraph_context,
+               EPCtxHandler& ep_ctx_handle);
 
   void Infer(OrtKernelContext* context) override;
+  ov::CompiledModel& GetOVCompiledModel() override {
+    return exe_network_.Get();
+  }
 
  private:
-  bool ImportBlob(std::string hw_target, bool npu_status);
   void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&);
   bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
   void PopulateConfigValue(ov::AnyMap& device_config);
@@ -49,10 +52,11 @@ class BasicBackend : public IBackend {
   GlobalContext& global_context_;
   SubGraphContext subgraph_context_;
   mutable std::mutex compute_lock_;
-  std::shared_ptr<OVNetwork> ie_cnn_network_;
+  std::shared_ptr<const OVNetwork> ie_cnn_network_;
   OVExeNetwork exe_network_;
   std::map<std::string, std::shared_ptr<ov::Node>> const_outputs_map_;
   std::unique_ptr<InferRequestsQueue> inferRequestsQueue_;
+  bool is_ep_ctx_graph_{false};
 #if defined IO_BUFFER_ENABLED
   OVRemoteContextPtr remote_context_;
 #endif
diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
index 8701d9f676ffd..6d0a558eeae45 100644
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@@ -6,7 +6,7 @@
 #include <vector>
 #include <unordered_map>
 #include <string>
-#include "ov_interface.h"
+#include "core/providers/openvino/ov_interface.h"
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -18,14 +18,16 @@ struct GlobalContext {
   bool enable_npu_fast_compile = false;
   bool enable_opencl_throttling = false;
   bool disable_dynamic_shapes = false;
+  bool ep_context_embed_mode = true;
+  bool export_ep_ctx_blob = false;
   size_t num_of_threads;
   std::string device_type;
   std::string precision_str;
-  std::string device_id;
+  std::string model_precision;
   std::string cache_dir;
+  std::string model_priority = "DEFAULT";
   int num_streams;
   std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
-  std::vector<std::string> deviceTags = {"0", "1", "2", "3", "4", "5", "6", "7"};
   std::string onnx_model_name;
   std::string onnx_model_path_name;
   int onnx_opset_version;
diff --git a/onnxruntime/core/providers/openvino/ibackend.h b/onnxruntime/core/providers/openvino/ibackend.h
index ece855c6167c6..eb0d8e8823896 100644
--- a/onnxruntime/core/providers/openvino/ibackend.h
+++ b/onnxruntime/core/providers/openvino/ibackend.h
@@ -6,6 +6,7 @@
 #include <memory>
 #define ORT_API_MANUAL_INIT
 #include "core/session/onnxruntime_cxx_api.h"
+#include "core/providers/openvino/onnx_ctx_model_helper.h"
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -13,6 +14,7 @@ namespace openvino_ep {
 class IBackend {
  public:
   virtual void Infer(OrtKernelContext* context) = 0;
+  virtual ov::CompiledModel& GetOVCompiledModel() = 0;
 };
 
 class BackendFactory {
@@ -20,7 +22,8 @@ class BackendFactory {
   static std::shared_ptr<IBackend>
   MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
               GlobalContext& global_context,
-              const SubGraphContext& subgraph_context);
+              const SubGraphContext& subgraph_context,
+              EPCtxHandler& ctx_handle);
 };
 
 }  // namespace openvino_ep
diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc
new file mode 100644
index 0000000000000..cd1ae6150e1da
--- /dev/null
+++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc
@@ -0,0 +1,123 @@
+// Copyright (C) Intel Corporation
+// Licensed under the MIT License
+
+#include <string>
+#include <fstream>
+#include <vector>
+
+#include "core/providers/openvino/onnx_ctx_model_helper.h"
+
+namespace onnxruntime {
+namespace openvino_ep {
+
+/* Export the serialized blob string embedded onto an EPContext Node
+ * along with other metadata necessary to validate the graph on import
+ */
+
+Status EPCtxHandler::ExportEPCtxModel(const GraphViewer& graph_viewer,
+                                      const std::string& graph_name,
+                                      const logging::Logger& logger,
+                                      const bool& ep_context_embed_mode,
+                                      const std::string& model_blob_str,
+                                      const std::string& openvino_sdk_version,
+                                      const std::string& device_type) const {
+  auto model_build = graph_viewer.CreateModel(logger);
+  auto& graph_build = model_build->MainGraph();
+
+  // Get graph inputs and outputs
+  std::vector<onnxruntime::NodeArg*> inputs, outputs;
+  for (auto input : graph_viewer.GetInputs()) {
+    auto& n_input = graph_build.GetOrCreateNodeArg(input->Name(), input->TypeAsProto());
+    inputs.push_back(&n_input);
+  }
+  for (auto output : graph_viewer.GetOutputs()) {
+    auto& n_output = graph_build.GetOrCreateNodeArg(output->Name(), output->TypeAsProto());
+    outputs.push_back(&n_output);
+  }
+
+  // Create EP context node attributes
+  auto attr_0 = ONNX_NAMESPACE::AttributeProto::Create();
+  auto attr_1 = ONNX_NAMESPACE::AttributeProto::Create();
+  auto attr_2 = ONNX_NAMESPACE::AttributeProto::Create();
+  auto attr_3 = ONNX_NAMESPACE::AttributeProto::Create();
+
+  // embed mode
+  attr_0->set_name(EMBED_MODE);
+  attr_0->set_type(onnx::AttributeProto_AttributeType_INT);
+  attr_0->set_i(ep_context_embed_mode);
+  // ep context
+  attr_1->set_name(EP_CACHE_CONTEXT);
+  attr_1->set_type(onnx::AttributeProto_AttributeType_STRING);
+  attr_1->set_s(model_blob_str);
+  // sdk version
+  attr_2->set_name(EP_SDK_VER);
+  attr_2->set_type(onnx::AttributeProto_AttributeType_STRING);
+  attr_2->set_s(openvino_sdk_version);
+  // source
+  attr_3->set_name(SOURCE);
+  attr_3->set_type(onnx::AttributeProto_AttributeType_STRING);
+  attr_3->set_s(kOpenVINOExecutionProvider);
+
+  auto node_attributes = ONNX_NAMESPACE::NodeAttributes::Create();
+  node_attributes->reserve(4);
+  node_attributes->emplace(EMBED_MODE, *attr_0);
+  node_attributes->emplace(EP_CACHE_CONTEXT, *attr_1);
+  node_attributes->emplace(EP_SDK_VER, *attr_2);
+  node_attributes->emplace(SOURCE, *attr_3);
+
+  // Create EP context node
+  graph_build.AddNode(graph_name, EPCONTEXT_OP, "", inputs, outputs, node_attributes.get(), kMSDomain);
+  ORT_ENFORCE(graph_build.Resolve().IsOK());
+
+  // Serialize modelproto to string
+  auto new_graph_viewer = graph_build.CreateGraphViewer();
+  auto model = new_graph_viewer->CreateModel(logger);
+  auto model_proto = model->ToProto();
+  new_graph_viewer->ToProto(*model_proto->mutable_graph(), true, true);
+  model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
+
+  // Finally, dump the model
+  std::ofstream dump(graph_name + "-ov_" + device_type + "_blob.onnx",
+                     std::ios::out | std::ios::trunc | std::ios::binary);
+  model_proto->SerializeToOstream(dump);
+
+  LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Export blob as EPContext Node";
+
+  return Status::OK();
+}
+
+Status EPCtxHandler::ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer) {
+  auto node = graph_viewer.GetNode(0);
+  auto& attrs = node->GetAttributes();
+  ORT_ENFORCE(attrs.count(EP_CACHE_CONTEXT) > 0);
+
+  model_stream_ = std::make_shared<std::istringstream>(attrs.at(EP_CACHE_CONTEXT).s());
+
+  LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node";
+
+  is_valid_ep_ctx_graph_ = true;
+  return Status::OK();
+}
+
+bool EPCtxHandler::CheckForOVEPCtxNode(const GraphViewer& graph_viewer, std::string openvino_sdk_version) const {
+  for (int i = 0; i < graph_viewer.MaxNodeIndex(); ++i) {
+    auto node = graph_viewer.GetNode(i);
+    auto& attrs = node->GetAttributes();
+
+    // Check for correct Op Type, EP SOURCE, and SDK version
+    if (node != nullptr && node->OpType() == EPCONTEXT_OP) {
+      if (attrs.at(SOURCE).s() == kOpenVINOExecutionProvider) {
+        if (attrs.at(EP_SDK_VER).s() == openvino_sdk_version) {
+          return true;
+        } else {
+          ORT_THROW("[Invalid Graph] Versions of OpenVINO used to export blob (" + attrs.at(EP_SDK_VER).s() +
+                    ") and current runtime (" + openvino_sdk_version + ") don't match.");
+        }
+      }
+    }
+  }
+  return false;
+}
+
+}  // namespace openvino_ep
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h
new file mode 100644
index 0000000000000..b2b9b5bc53d44
--- /dev/null
+++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h
@@ -0,0 +1,45 @@
+// Copyright (C) Intel Corporation
+// Licensed under the MIT License
+
+#pragma once
+
+#include <sstream>
+#include <string>
+#include <memory>
+
+#include "core/providers/shared_library/provider_api.h"
+
+namespace onnxruntime {
+namespace openvino_ep {
+
+// Utilities to handle EPContext node export and parsing of an EPContext node
+// to create the compiled_model object to infer on
+static const char EPCONTEXT_OP[] = "EPContext";
+static const char EMBED_MODE[] = "embed_mode";
+static const char EP_CACHE_CONTEXT[] = "ep_cache_context";
+static const char EP_SDK_VER[] = "ep_sdk_version";
+static const char SOURCE[] = "source";
+
+class EPCtxHandler {
+ public:
+  EPCtxHandler() = default;
+  EPCtxHandler(const EPCtxHandler&) = default;
+  Status ExportEPCtxModel(const GraphViewer& graph_viewer,
+                          const std::string& graph_name,
+                          const logging::Logger& logger,
+                          const bool& ep_context_embed_mode,
+                          const std::string& model_blob_str,
+                          const std::string& openvino_sdk_version,
+                          const std::string& device_type) const;
+  Status ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer);
+  bool CheckForOVEPCtxNode(const GraphViewer& graph_viewer, std::string openvino_sdk_version) const;
+  bool IsValidOVEPCtxGraph() const { return is_valid_ep_ctx_graph_; }
+  [[nodiscard]] const std::shared_ptr<std::istringstream> GetModelBlobStream() const { return model_stream_; }
+
+ private:
+  bool is_valid_ep_ctx_graph_{false};
+  std::shared_ptr<std::istringstream> model_stream_;
+};
+
+}  // namespace openvino_ep
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index 913440d2fb6ea..656280114c3bd 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -1,11 +1,13 @@
 // Copyright (C) Intel Corporation
 // Licensed under the MIT License
+#include <filesystem>
 
 #include "core/providers/shared_library/provider_api.h"
-#include "openvino_execution_provider.h"
-#include "contexts.h"
-#include "backend_manager.h"
-#include "ov_versions/capability.h"
+#include "core/providers/openvino/openvino_execution_provider.h"
+#include "core/providers/openvino/contexts.h"
+#include "core/providers/openvino/backend_manager.h"
+#include "core/providers/openvino/onnx_ctx_model_helper.h"
+#include "core/providers/openvino/ov_versions/capability.h"
 #include "openvino/core/version.hpp"
 
 #define MEMCPY_S(dest, src, destsz, srcsz) memcpy(dest, src, std::min(destsz, srcsz))
@@ -21,18 +23,19 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
   global_context_->precision_str = info.precision_;
   global_context_->enable_npu_fast_compile = info.enable_npu_fast_compile_;
   global_context_->cache_dir = info.cache_dir_;
+  global_context_->model_priority = info.model_priority_;
   global_context_->num_streams = info.num_streams_;
   global_context_->context = info.context_;
   global_context_->enable_opencl_throttling = info.enable_opencl_throttling_;
   global_context_->disable_dynamic_shapes = info.disable_dynamic_shapes_;
   global_context_->num_of_threads = info.num_of_threads_;
   global_context_->OpenVINO_Version = {OPENVINO_VERSION_MAJOR, OPENVINO_VERSION_MINOR};
+  global_context_->export_ep_ctx_blob = info.export_ep_ctx_blob_;
 
   // to check if target device is available
   // using ie_core capability GetAvailableDevices to fetch list of devices plugged in
   if (info.cache_dir_.empty()) {
     bool device_found = false;
-    bool device_id_found = false;
     auto available_devices = global_context_->ie_core.GetAvailableDevices();
     // Checking for device_type configuration
     if (info.device_type_ != "") {
@@ -40,15 +43,16 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
           info.device_type_.find("MULTI") != std::string::npos ||
           info.device_type_.find("AUTO") != std::string::npos) {
         device_found = true;
-      } else if (info.device_type_ == "CPU" || info.device_type_.find("GPU") != std::string::npos) {
+      } else {
         for (auto device : available_devices) {
           if (device.rfind(info.device_type_, 0) == 0) {
             if (info.device_type_.find("GPU") != std::string::npos && (info.precision_ == "FP32" ||
-                                                                       info.precision_ == "FP16")) {
+                                                                       info.precision_ == "FP16" ||
+                                                                       info.precision_ == "ACCURACY")) {
               device_found = true;
               break;
             }
-            if (info.device_type_ == "CPU" && (info.precision_ == "FP32" || info.precision_ == "FP16")) {
+            if (info.device_type_ == "CPU" && (info.precision_ == "FP32")) {
               device_found = true;
               break;
             }
@@ -58,51 +62,31 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
             }
           }
         }
-      } else {
-        device_found = true;
       }
     }
     if (!device_found) {
-      std::string err_msg = std::string("Device Type not found : ") + info.device_type_ +
-                            "\nChoose the right precision with one of:\n";
-      for (auto device : available_devices) {
-        err_msg = err_msg + device + "\n";
-      }
-      ORT_THROW(err_msg);
-    }
-    // Checking for device_id configuration
-    if (info.device_id_ != "") {
-      for (auto device : available_devices) {
-        if (device.rfind(info.device_id_, 0) == 0) {
-          if (info.device_id_ == "CPU" || info.device_id_ == "GPU") {
-            LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
-                               << "Switching to Device ID: " << info.device_id_;
-            device_id_found = true;
-            break;
-          }
-        }
-      }
-      if (!device_id_found) {
-        std::string err_msg = std::string("Device ID not found : ") + info.device_id_ + "\nChoose one of:\n";
-        for (auto device : available_devices) {
-          err_msg = err_msg + device + "\n";
-        }
-        ORT_THROW(err_msg);
-      }
+      ORT_THROW("[ERROR] [OpenVINO] Specified device - " + info.device_type_ + " is not available");
     }
   }
-  global_context_->device_id = info.device_id_;
 }
 
 std::vector<std::unique_ptr<ComputeCapability>>
 OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
                                          const IKernelLookup& /*kernel_lookup*/) const {
   std::vector<std::unique_ptr<ComputeCapability>> result;
+
+  std::string openvino_sdk_version = std::to_string(global_context_->OpenVINO_Version.at(0)) + "." +
+                                     std::to_string(global_context_->OpenVINO_Version.at(1));
+
+  // Check for valid ctx node and maintain state for validity
+  if (ep_ctx_handle_.CheckForOVEPCtxNode(graph_viewer, openvino_sdk_version))
+    ORT_ENFORCE(graph_viewer.NumberOfNodes() == 1,
+                "[Invalid Graph] EPContext Model with OpenVINO compiled blob should not have more than one node.");
+
   // Enable CI Logs
   if (!(GetEnvironmentVar("ORT_OPENVINO_ENABLE_CI_LOG").empty())) {
     std::cout << "In the OpenVINO EP" << std::endl;
   }
-  global_context_->onnx_model_name = graph_viewer.Name();
 #ifdef _WIN32
   std::wstring onnx_path = graph_viewer.ModelPath().ToPathString();
   global_context_->onnx_model_path_name =
@@ -114,9 +98,26 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
   global_context_->onnx_opset_version =
       graph_viewer.DomainToVersionMap().at(kOnnxDomain);
 
+  global_context_->model_precision = [&](const GraphViewer& graph_viewer) {
+    // return empty if graph has no inputs or if types are not one of FP32/FP16
+    // else assume the type of the first input
+    if (graph_viewer.GetInputs().empty()) {
+      return "";
+    } else {
+      auto input_type = graph_viewer.GetInputs()[0]->TypeAsProto()->tensor_type().elem_type();
+      if (global_context_->precision_str == "ACCURACY" && global_context_->device_type == "GPU") {
+        if (input_type == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT) {
+          return "FP32";
+        } else if (input_type == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16) {
+          return "FP16";
+        }
+      }
+    }
+    return "";
+  }(graph_viewer);
+
   openvino_ep::GetCapability obj(graph_viewer,
-                                 global_context_->device_type,
-                                 global_context_->precision_str);
+                                 global_context_->device_type);
   result = obj.Execute();
 
   global_context_->is_wholly_supported_graph = obj.IsWhollySupportedGraph();
@@ -135,8 +136,21 @@ common::Status OpenVINOExecutionProvider::Compile(
 
     global_context_->use_api_2 = true;
 
+    // During backend creation, we check if user wants to use precompiled blob onnx model or the original model
+    // For precompiled blob, directly load the model instead of compiling the model
+    // For original model, check if the user wants to export a model with pre-compiled blob
+
     std::shared_ptr<openvino_ep::BackendManager> backend_manager =
-        std::make_shared<openvino_ep::BackendManager>(*global_context_, fused_node, graph_body_viewer, *GetLogger());
+        std::make_shared<openvino_ep::BackendManager>(*global_context_,
+                                                      fused_node,
+                                                      graph_body_viewer,
+                                                      *GetLogger(),
+                                                      ep_ctx_handle_);
+
+    if (global_context_->export_ep_ctx_blob && !ep_ctx_handle_.IsValidOVEPCtxGraph()) {
+      ORT_RETURN_IF_ERROR(backend_manager->ExportCompiledBlobAsEPCtxNode(graph_body_viewer,
+                                                                         *GetLogger()));
+    }
 
     compute_info.create_state_func =
         [backend_manager](ComputeContext* context, FunctionState* state) {
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
index b0dc881c36f33..75ffb807fe925 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -9,8 +9,9 @@
 #include <string>
 #include <memory>
 #include <vector>
+#include <set>
 
-#include "backend_manager.h"
+#include "core/providers/openvino/backend_manager.h"
 
 namespace onnxruntime {
 
@@ -60,52 +61,54 @@ static std::vector<std::string> parseDevices(const std::string& device_string) {
 
 // Information needed to construct OpenVINO execution providers.
 struct OpenVINOExecutionProviderInfo {
-  std::string device_type_;
-  std::string precision_;
-  bool enable_npu_fast_compile_;
-  std::string device_id_;
-  size_t num_of_threads_;
-  std::string cache_dir_;
-  int num_streams_;
-  void* context_;
-  bool enable_opencl_throttling_;
-  bool disable_dynamic_shapes_;
-
-  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_npu_fast_compile, std::string dev_id,
-                                         size_t num_of_threads, std::string cache_dir, int num_streams,
-                                         void* context, bool enable_opencl_throttling,
-                                         bool disable_dynamic_shapes)
-      : enable_npu_fast_compile_(enable_npu_fast_compile),
-        device_id_(dev_id),
+  std::string device_type_{""};
+  std::string precision_{""};
+  bool enable_npu_fast_compile_{false};
+  size_t num_of_threads_{0};
+  std::string cache_dir_{""};
+  std::string model_priority_{""};
+  int num_streams_{1};
+  void* context_{NULL};
+  bool enable_opencl_throttling_{false};
+  bool disable_dynamic_shapes_{false};
+  bool export_ep_ctx_blob_{false};
+
+  OpenVINOExecutionProviderInfo() = delete;
+
+  explicit OpenVINOExecutionProviderInfo(std::string dev_type, std::string precision, bool enable_npu_fast_compile,
+                                         size_t num_of_threads, std::string cache_dir, std::string model_priority,
+                                         int num_streams, void* context, bool enable_opencl_throttling,
+                                         bool disable_dynamic_shapes, bool export_ep_ctx_blob)
+      : precision_(precision),
+        enable_npu_fast_compile_(enable_npu_fast_compile),
         num_of_threads_(num_of_threads),
         cache_dir_(cache_dir),
+        model_priority_(model_priority),
         num_streams_(num_streams),
         context_(context),
         enable_opencl_throttling_(enable_opencl_throttling),
-        disable_dynamic_shapes_(disable_dynamic_shapes) {
+        disable_dynamic_shapes_(disable_dynamic_shapes),
+        export_ep_ctx_blob_(export_ep_ctx_blob) {
+    std::set<std::string> ov_supported_device_types = {"CPU", "GPU",
+                                                       "GPU.0", "GPU.1", "NPU"};
     if (dev_type == "") {
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
                          << "No runtime device selection option provided.";
-#if defined OPENVINO_CONFIG_CPU_FP32
+#if defined OPENVINO_CONFIG_CPU
       device_type_ = "CPU";
       precision_ = "FP32";
-#elif defined OPENVINO_CONFIG_CPU_FP16
-      device_type_ = "CPU";
-      precision_ = "FP16";
-#elif defined OPENVINO_CONFIG_GPU_FP32
-      device_type_ = "GPU";
-      precision_ = "FP32";
-#elif defined OPENVINO_CONFIG_GPU_FP16
+#elif defined OPENVINO_CONFIG_GPU
       device_type_ = "GPU";
       precision_ = "FP16";
 #elif defined OPENVINO_CONFIG_NPU
       device_type_ = "NPU";
-      precision_ = "";
+      precision_ = "FP16";
 #elif defined OPENVINO_CONFIG_HETERO || defined OPENVINO_CONFIG_MULTI || defined OPENVINO_CONFIG_AUTO
 #ifdef DEVICE_NAME
 #define DEVICE DEVICE_NAME
 #endif
       dev_type = DEVICE;
+
       if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0 || dev_type.find("AUTO") == 0) {
         std::vector<std::string> devices = parseDevices(dev_type);
         precision_ = "FP16";
@@ -115,33 +118,8 @@ struct OpenVINOExecutionProviderInfo {
         device_type_ = dev_type;
       }
 #endif
-    } else if (dev_type == "CPU_FP32") {
-      device_type_ = "CPU";
-      precision_ = "FP32";
-    } else if (dev_type == "CPU_FP16") {
-      device_type_ = "CPU";
-      precision_ = "FP16";
-    } else if (dev_type == "GPU_FP32") {
-      device_type_ = "GPU";
-      precision_ = "FP32";
-    } else if (dev_type == "GPU.0_FP32") {
-      device_type_ = "GPU.0";
-      precision_ = "FP32";
-    } else if (dev_type == "GPU.1_FP32") {
-      device_type_ = "GPU.1";
-      precision_ = "FP32";
-    } else if (dev_type == "GPU_FP16") {
-      device_type_ = "GPU";
-      precision_ = "FP16";
-    } else if (dev_type == "GPU.0_FP16") {
-      device_type_ = "GPU.0";
-      precision_ = "FP16";
-    } else if (dev_type == "GPU.1_FP16") {
-      device_type_ = "GPU.1";
-      precision_ = "FP16";
-    } else if (dev_type == "NPU") {
-      device_type_ = "NPU";
-      precision_ = "";
+    } else if (ov_supported_device_types.find(dev_type) != ov_supported_device_types.end()) {
+      device_type_ = dev_type;
     } else if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0) {
       std::vector<std::string> devices = parseDevices(dev_type);
       precision_ = "FP16";
@@ -159,9 +137,6 @@ struct OpenVINOExecutionProviderInfo {
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
                        << "Choosing Device: " << device_type_ << " , Precision: " << precision_;
   }
-  OpenVINOExecutionProviderInfo() {
-    OpenVINOExecutionProviderInfo("", false, "", 0, "", 1, NULL, false, false);
-  }
 };
 
 struct OpenVINOEPFunctionState {
@@ -190,6 +165,7 @@ class OpenVINOExecutionProvider : public IExecutionProvider {
 
  private:
   std::unique_ptr<openvino_ep::GlobalContext> global_context_;
+  openvino_ep::EPCtxHandler ep_ctx_handle_{};
 };
 
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
index 17511c54aab86..0ba1f50cad54f 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -8,18 +8,22 @@
 
 namespace onnxruntime {
 struct OpenVINOProviderFactory : IExecutionProviderFactory {
-  OpenVINOProviderFactory(const char* device_type, bool enable_npu_fast_compile,
-                          const char* device_id, size_t num_of_threads,
-                          const char* cache_dir, int num_streams, void* context,
-                          bool enable_opencl_throttling, bool disable_dynamic_shapes)
-      : enable_npu_fast_compile_(enable_npu_fast_compile),
+  OpenVINOProviderFactory(const char* device_type, const char* precision,
+                          bool enable_npu_fast_compile, size_t num_of_threads,
+                          const char* cache_dir, const char* model_priority,
+                          int num_streams, void* context,
+                          bool enable_opencl_throttling, bool disable_dynamic_shapes,
+                          bool export_ep_ctx_blob)
+      : precision_(precision),
+        enable_npu_fast_compile_(enable_npu_fast_compile),
         num_of_threads_(num_of_threads),
+        model_priority_(model_priority),
         num_streams_(num_streams),
         context_(context),
         enable_opencl_throttling_(enable_opencl_throttling),
-        disable_dynamic_shapes_(disable_dynamic_shapes) {
+        disable_dynamic_shapes_(disable_dynamic_shapes),
+        export_ep_ctx_blob_(export_ep_ctx_blob) {
     device_type_ = (device_type == nullptr) ? "" : device_type;
-    device_id_ = (device_id == nullptr) ? "" : device_id;
     cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
   }
   ~OpenVINOProviderFactory() override {
@@ -29,20 +33,22 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
 
  private:
   std::string device_type_;
+  std::string precision_;
   bool enable_npu_fast_compile_;
-  std::string device_id_;
   size_t num_of_threads_;
   std::string cache_dir_;
+  std::string model_priority_;
   int num_streams_;
   void* context_;
   bool enable_opencl_throttling_;
   bool disable_dynamic_shapes_;
+  bool export_ep_ctx_blob_;
 };
 
 std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
-  OpenVINOExecutionProviderInfo info(device_type_, enable_npu_fast_compile_, device_id_, num_of_threads_,
-                                     cache_dir_, num_streams_, context_, enable_opencl_throttling_,
-                                     disable_dynamic_shapes_);
+  OpenVINOExecutionProviderInfo info(device_type_, precision_, enable_npu_fast_compile_, num_of_threads_,
+                                     cache_dir_, model_priority_, num_streams_, context_, enable_opencl_throttling_,
+                                     disable_dynamic_shapes_, export_ep_ctx_blob_);
   return std::make_unique<OpenVINOExecutionProvider>(info);
 }
 
@@ -62,44 +68,94 @@ struct OpenVINO_Provider : Provider {
   std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const void* void_params) override {
     auto& provider_options_map = *reinterpret_cast<const ProviderOptions*>(void_params);
 
-    std::string device_type = "";           // [device_type]: Overrides the accelerator hardware type and precision
-                                            //   with these values at runtime.
-    bool enable_npu_fast_compile = false;   // [enable_npu_fast_compile]: Fast-compile may be optionally enabled to
-                                            // speeds up the model's compilation to NPU device specific format.
-    const char* device_id = "";             // [device_id]: Selects a particular hardware device for inference.
-    int num_of_threads = 0;                 // [num_of_threads]: Overrides the accelerator default value of number of
-                                            //  threads with this value at runtime.
-    const char* cache_dir = "";             // [cache_dir]: specify the path to
-                                            // dump and load the blobs for the model caching/kernel caching (GPU)
-                                            // feature. If blob files are already present, it will be directly loaded.
-    int num_streams = 1;                    // [num_streams]: Option that specifies the number of parallel inference
-                                            // requests to be processed on a given `device_type`. Overrides the
-                                            // accelerator default value of number of streams
-                                            // with this value at runtime.
-    bool enable_opencl_throttling = false;  // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
-                                            // device (Reduces CPU Utilization when using GPU)
+    std::string device_type = "";            // [device_type]: Overrides the accelerator hardware type and precision
+                                             //   with these values at runtime.
+    std::string precision = "";              // [precision]: Sets the inference precision for execution.
+                                             // Supported precision for devices are CPU=FP32, GPU=FP32,FP16, NPU=FP16.
+                                             // Not setting precision will execute with optimized precision for
+                                             // best inference latency. set Precision=ACCURACY for executing models
+                                             // with input precision for best accuracy.
+    bool enable_npu_fast_compile = false;    // [enable_npu_fast_compile]: Fast-compile may be optionally enabled to
+                                             // speeds up the model's compilation to NPU device specific format.
+    int num_of_threads = 0;                  // [num_of_threads]: Overrides the accelerator default value of number of
+                                             //  threads with this value at runtime.
+    const char* cache_dir = "";              // [cache_dir]: specify the path to
+                                             // dump and load the blobs for the model caching/kernel caching (GPU)
+                                             // feature. If blob files are already present, it will be directly loaded.
+    const char* model_priority = "DEFAULT";  // High-level OpenVINO model priority hint
+                                             // Defines what model should be provided with more performant
+                                             // bounded resource first
+    int num_streams = 1;                     // [num_streams]: Option that specifies the number of parallel inference
+                                             // requests to be processed on a given `device_type`. Overrides the
+                                             // accelerator default value of number of streams
+                                             // with this value at runtime.
+    bool enable_opencl_throttling = false;   // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
+                                             // device (Reduces CPU Utilization when using GPU)
+    bool export_ep_ctx_blob = false;         // Whether to export the pre-compiled blob as an EPContext model.
+
     void* context = nullptr;
 
     if (provider_options_map.find("device_type") != provider_options_map.end()) {
       device_type = provider_options_map.at("device_type").c_str();
 
-      std::set<std::string> ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32",
-                                                         "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
-                                                         "GPU.0_FP16", "GPU.1_FP16", "NPU"};
+      std::set<std::string> ov_supported_device_types = {"CPU", "GPU",
+                                                         "GPU.0", "GPU.1", "NPU"};
+      std::set<std::string> deprecated_device_types = {"CPU_FP32", "GPU_FP32",
+                                                       "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
+                                                       "GPU.0_FP16", "GPU.1_FP16"};
+      if (deprecated_device_types.find(device_type) != deprecated_device_types.end()) {
+        std::string deprecated_device = device_type;
+        int delimit = device_type.find("_");
+        device_type = deprecated_device.substr(0, delimit);
+        precision = deprecated_device.substr(delimit + 1);
+        LOGS_DEFAULT(WARNING) << "[OpenVINO] Selected 'device_type' " + deprecated_device + " is deprecated. \n"
+                              << "Update the 'device_type' to specified types 'CPU', 'GPU', 'GPU.0', "
+                              << "'GPU.1', 'NPU' or from"
+                              << " HETERO/MULTI/AUTO options and set 'precision' separately. \n";
+      }
       if (!((ov_supported_device_types.find(device_type) != ov_supported_device_types.end()) ||
             (device_type.find("HETERO:") == 0) ||
             (device_type.find("MULTI:") == 0) ||
             (device_type.find("AUTO:") == 0))) {
         ORT_THROW(
             "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
-            "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
-            "'GPU.0_FP16', 'GPU.1_FP16', 'NPU' or from"
+            "Select from 'CPU', 'GPU', 'GPU.0', 'GPU.1', 'NPU' or from"
             " HETERO/MULTI/AUTO options available. \n");
       }
     }
     if (provider_options_map.find("device_id") != provider_options_map.end()) {
-      device_id = provider_options_map.at("device_id").c_str();
+      std::string dev_id = provider_options_map.at("device_id").c_str();
+      LOGS_DEFAULT(WARNING) << "[OpenVINO] The options 'device_id' is deprecated. "
+                            << "Upgrade to set deice_type and precision session options.\n";
+      if (dev_id == "CPU" || dev_id == "GPU" || dev_id == "NPU") {
+        device_type = dev_id;
+      } else {
+        ORT_THROW("[ERROR] [OpenVINO] Unsupported device_id is selected. Select from available options.");
+      }
     }
+    if (provider_options_map.find("precision") != provider_options_map.end()) {
+      precision = provider_options_map.at("precision").c_str();
+    }
+    if (device_type == "CPU") {
+      if (precision == "" || precision == "ACCURACY" || precision == "FP32") {
+        precision = "FP32";
+      } else {
+        ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. CPU only supports FP32 . \n");
+      }
+    } else if (device_type == "NPU") {
+      if (precision == "" || precision == "ACCURACY" || precision == "FP16") {
+        precision = "FP16";
+      } else {
+        ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. NPU only supported FP16. \n");
+      }
+    } else if (device_type == "GPU") {
+      if (precision == "") {
+        precision = "FP16";
+      } else if (precision != "ACCURACY" && precision != "FP16" && precision != "FP32") {
+        ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. GPU only supports FP32 / FP16. \n");
+      }
+    }
+
     if (provider_options_map.find("cache_dir") != provider_options_map.end()) {
       cache_dir = provider_options_map.at("cache_dir").c_str();
     }
@@ -119,6 +175,18 @@ struct OpenVINO_Provider : Provider {
       }
     }
 
+    if (provider_options_map.find("model_priority") != provider_options_map.end()) {
+      model_priority = provider_options_map.at("model_priority").c_str();
+      std::vector<std::string> supported_priorities({"LOW", "MEDIUM", "HIGH", "DEFAULT"});
+      if (std::find(supported_priorities.begin(), supported_priorities.end(),
+                    model_priority) == supported_priorities.end()) {
+        model_priority = "DEFAULT";
+        LOGS_DEFAULT(WARNING) << "[OpenVINO-EP] The value for the key 'model_priority' "
+                              << "is not one of LOW, MEDIUM, HIGH, DEFAULT. "
+                              << "Executing with model_priorty=DEFAULT";
+      }
+    }
+
     if (provider_options_map.find("num_streams") != provider_options_map.end()) {
       num_streams = std::stoi(provider_options_map.at("num_streams"));
       if (num_streams <= 0) {
@@ -154,26 +222,38 @@ struct OpenVINO_Provider : Provider {
     }
     if (provider_options_map.find("disable_dynamic_shapes") != provider_options_map.end()) {
       bool_flag = provider_options_map.at("disable_dynamic_shapes");
-      if (bool_flag == "true" || bool_flag == "True")
+      if (bool_flag == "true" || bool_flag == "True") {
         disable_dynamic_shapes = true;
-      else if (bool_flag == "false" || bool_flag == "False") {
+      } else if (bool_flag == "false" || bool_flag == "False") {
         if (device_type.find("NPU") != std::string::npos) {
           disable_dynamic_shapes = true;
-          LOGS_DEFAULT(INFO) << "[OpenVINO-EP] The value for the key 'disable_dynamic_shapes' will be set to TRUE for NPU backend.\n ";
+          LOGS_DEFAULT(INFO) << "[OpenVINO-EP] The value for the key 'disable_dynamic_shapes' will be set to "
+                             << "TRUE for NPU backend.\n ";
         } else {
           disable_dynamic_shapes = false;
         }
       }
     }
+
+    if (provider_options_map.find("export_ep_ctx_blob") != provider_options_map.end()) {
+      bool_flag = provider_options_map.at("export_ep_ctx_blob");
+      if (bool_flag == "true" || bool_flag == "True")
+        export_ep_ctx_blob = true;
+      else if (bool_flag == "false" || bool_flag == "False")
+        export_ep_ctx_blob = false;
+      bool_flag = "";
+    }
     return std::make_shared<OpenVINOProviderFactory>(const_cast<char*>(device_type.c_str()),
+                                                     const_cast<char*>(precision.c_str()),
                                                      enable_npu_fast_compile,
-                                                     device_id,
                                                      num_of_threads,
                                                      cache_dir,
+                                                     model_priority,
                                                      num_streams,
                                                      context,
                                                      enable_opencl_throttling,
-                                                     disable_dynamic_shapes);
+                                                     disable_dynamic_shapes,
+                                                     export_ep_ctx_blob);
   }
 
   void Initialize() override {
diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
index d7c6654c90f81..1ada1e1cc9d17 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.cc
+++ b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -1,12 +1,12 @@
 // Copyright (C) Intel Corporation
 // Licensed under the MIT License
 
-#include "ov_interface.h"
-#include <fstream>
+#include "core/providers/openvino/ov_interface.h"
+
 #define ORT_API_MANUAL_INIT
 #include "core/session/onnxruntime_cxx_api.h"
 #include "core/providers/shared_library/provider_api.h"
-#include "backend_utils.h"
+#include "core/providers/openvino/backend_utils.h"
 
 using Exception = ov::Exception;
 
@@ -14,6 +14,38 @@ namespace onnxruntime {
 namespace openvino_ep {
 
 const std::string log_tag = "[OpenVINO-EP] ";
+
+#ifndef NDEBUG
+void printDebugInfo(const ov::CompiledModel& obj) {
+  if (onnxruntime::openvino_ep::backend_utils::IsDebugEnabled()) {
+    // output of the actual settings that the device selected
+    auto supported_properties = obj.get_property(ov::supported_properties);
+    std::cout << "Model:" << std::endl;
+    for (const auto& cfg : supported_properties) {
+      if (cfg == ov::supported_properties)
+        continue;
+      auto prop = obj.get_property(cfg);
+      if (cfg == ov::device::properties) {
+        auto devices_properties = prop.as<ov::AnyMap>();
+        for (auto& item : devices_properties) {
+          std::cout << "  " << item.first << ": " << std::endl;
+          for (auto& item2 : item.second.as<ov::AnyMap>()) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
+            if (item2.first == ov::supported_properties || item2.first == "SUPPORTED_CONFIG_KEYS)" ||
+                item2.first == "SUPPORTED_METRICS")
+              continue;
+            OPENVINO_SUPPRESS_DEPRECATED_END
+            std::cout << "    " << item2.first << ": " << item2.second.as<std::string>() << std::endl;
+          }
+        }
+      } else {
+        std::cout << "  " << cfg << ": " << prop.as<std::string>() << std::endl;
+      }
+    }
+  }
+}
+#endif
+
 std::shared_ptr<OVNetwork> OVCore::ReadModel(const std::string& model, const std::string& model_path) const {
   try {
     std::istringstream modelStringStream(model);
@@ -37,41 +69,42 @@ std::shared_ptr<OVNetwork> OVCore::ReadModel(const std::string& model, const std
   }
 }
 
-OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network,
-                                 std::string& hw_target,
-                                 ov::AnyMap& device_config,
-                                 std::string name) {
+OVExeNetwork OVCore::CompileModel(std::shared_ptr<const OVNetwork>& ie_cnn_network,
+                                  std::string& hw_target,
+                                  ov::AnyMap& device_config,
+                                  std::string name) {
   ov::CompiledModel obj;
   try {
     obj = oe.compile_model(ie_cnn_network, hw_target, device_config);
-
 #ifndef NDEBUG
-    if (onnxruntime::openvino_ep::backend_utils::IsDebugEnabled()) {
-      // output of the actual settings that the device selected
-      auto supported_properties = obj.get_property(ov::supported_properties);
-      std::cout << "Model:" << std::endl;
-      for (const auto& cfg : supported_properties) {
-        if (cfg == ov::supported_properties)
-          continue;
-        auto prop = obj.get_property(cfg);
-        if (cfg == ov::device::properties) {
-          auto devices_properties = prop.as<ov::AnyMap>();
-          for (auto& item : devices_properties) {
-            std::cout << "  " << item.first << ": " << std::endl;
-            for (auto& item2 : item.second.as<ov::AnyMap>()) {
-              OPENVINO_SUPPRESS_DEPRECATED_START
-              if (item2.first == ov::supported_properties || item2.first == "SUPPORTED_CONFIG_KEYS)" ||
-                  item2.first == "SUPPORTED_METRICS")
-                continue;
-              OPENVINO_SUPPRESS_DEPRECATED_END
-              std::cout << "    " << item2.first << ": " << item2.second.as<std::string>() << std::endl;
-            }
-          }
-        } else {
-          std::cout << "  " << cfg << ": " << prop.as<std::string>() << std::endl;
-        }
-      }
+    printDebugInfo(obj);
+#endif
+    OVExeNetwork exe(obj);
+    return exe;
+  } catch (const Exception& e) {
+    ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what());
+  } catch (...) {
+    ORT_THROW(log_tag + " Exception while Loading Network for graph " + name);
+  }
+}
+
+OVExeNetwork OVCore::CompileModel(const std::string onnx_model_path,
+                                  std::string& hw_target,
+                                  std::string cache_dir,
+                                  ov::AnyMap& device_config,
+                                  std::string name) {
+  ov::CompiledModel obj;
+  try {
+    if (hw_target == "AUTO:GPU,CPU") {
+      obj = oe.compile_model(onnx_model_path,
+                             "AUTO",
+                             ov::device::priorities("GPU", "CPU"),
+                             ov::device::properties("GPU", ov::cache_dir(cache_dir)));
+    } else {
+      obj = oe.compile_model(onnx_model_path, hw_target, device_config);
     }
+#ifndef NDEBUG
+    printDebugInfo(obj);
 #endif
     OVExeNetwork exe(obj);
     return exe;
@@ -82,13 +115,15 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network,
   }
 }
 
-OVExeNetwork OVCore::LoadNetwork(const std::string onnx_model_path,
+OVExeNetwork OVCore::ImportModel(std::shared_ptr<std::istringstream> model_stream,
                                  std::string& hw_target,
                                  ov::AnyMap& device_config,
                                  std::string name) {
-  ov::CompiledModel obj;
   try {
-    obj = oe.compile_model(onnx_model_path, hw_target, device_config);
+    auto obj = oe.import_model(*model_stream, hw_target, device_config);
+#ifndef NDEBUG
+    printDebugInfo(obj);
+#endif
     OVExeNetwork exe(obj);
     return exe;
   } catch (const Exception& e) {
@@ -98,14 +133,20 @@ OVExeNetwork OVCore::LoadNetwork(const std::string onnx_model_path,
   }
 }
 
-void OVCore::SetCache(std::string cache_dir_path) {
-  oe.set_property(ov::cache_dir(cache_dir_path));
+void OVCore::SetCache(std::string cache_dir_path, std::string device_type) {
+  if (device_type == "AUTO:GPU,CPU") {
+    oe.set_property(ov::cache_dir(cache_dir_path));
+  }
 }
 
 #ifdef IO_BUFFER_ENABLED
-OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& model, OVRemoteContextPtr context, std::string& name) {
+OVExeNetwork OVCore::CompileModel(std::shared_ptr<const OVNetwork>& model,
+                                  OVRemoteContextPtr context, std::string& name) {
   try {
     auto obj = oe.compile_model(model, *context);
+#ifndef NDEBUG
+    printDebugInfo(obj);
+#endif
     return OVExeNetwork(obj);
   } catch (const Exception& e) {
     ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what());
@@ -113,6 +154,21 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& model, OVRemoteCont
     ORT_THROW(log_tag + " Exception while Loading Network for graph " + name);
   }
 }
+OVExeNetwork OVCore::ImportModel(std::shared_ptr<std::istringstream> model_stream,
+                                 OVRemoteContextPtr context, std::string& name) {
+  try {
+    auto obj = oe.import_model(*model_stream, *context);
+#ifndef NDEBUG
+    printDebugInfo(obj);
+#endif
+    OVExeNetwork exe(obj);
+    return exe;
+  } catch (const Exception& e) {
+    ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what());
+  } catch (...) {
+    ORT_THROW(log_tag + " Exception while Loading Network for graph " + name);
+  }
+}
 #endif
 
 std::vector<std::string> OVCore::GetAvailableDevices() {
diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h
index 2a13fafb99fd3..f61d3608574da 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.h
+++ b/onnxruntime/core/providers/openvino/ov_interface.h
@@ -5,6 +5,8 @@
 
 #include <vector>
 #include <memory>
+#include <fstream>
+#include <sstream>
 
 #include "openvino/openvino.hpp"
 #include "openvino/pass/convert_fp32_to_fp16.hpp"
@@ -38,22 +40,26 @@ class OVCore {
 
  public:
   std::shared_ptr<OVNetwork> ReadModel(const std::string& model_stream, const std::string& model_path) const;
-  OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network,
+  OVExeNetwork CompileModel(std::shared_ptr<const OVNetwork>& ie_cnn_network,
+                            std::string& hw_target,
+                            ov::AnyMap& device_config,
+                            std::string name);
+  OVExeNetwork CompileModel(const std::string onnx_model_path,
+                            std::string& hw_target,
+                            std::string cache_dir,
+                            ov::AnyMap& device_config,
+                            std::string name);
+  OVExeNetwork ImportModel(std::shared_ptr<std::istringstream> model_stream,
                            std::string& hw_target,
                            ov::AnyMap& device_config,
                            std::string name);
-  OVExeNetwork LoadNetwork(const std::string model_path,
-                           std::string& hw_target,
-                           ov::AnyMap& device_config,
-                           std::string name);
-  void SetCache(std::string cache_dir_path);
 #ifdef IO_BUFFER_ENABLED
-  OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& model, OVRemoteContextPtr context, std::string& name);
+  OVExeNetwork CompileModel(std::shared_ptr<const OVNetwork>& model, OVRemoteContextPtr context, std::string& name);
+  OVExeNetwork ImportModel(std::shared_ptr<std::istringstream> model_stream, OVRemoteContextPtr context, std::string& name);
 #endif
   std::vector<std::string> GetAvailableDevices();
-  ov::Core& Get() {
-    return oe;
-  }
+  void SetCache(std::string cache_dir_path, std::string device_type);
+  ov::Core& Get() { return oe; }
   void SetStreams(const std::string& device_type, int num_streams);
 };
 
@@ -61,8 +67,8 @@ class OVExeNetwork {
   ov::CompiledModel obj;
 
  public:
-  explicit OVExeNetwork(ov::CompiledModel md) { obj = md; }
-  OVExeNetwork() { obj = ov::CompiledModel(); }
+  explicit OVExeNetwork(ov::CompiledModel md) : obj(md) {}
+  OVExeNetwork() : obj(ov::CompiledModel()) {}
   ov::CompiledModel& Get() { return obj; }
   OVInferRequest CreateInferRequest();
 };
@@ -77,8 +83,8 @@ class OVInferRequest {
   void Infer();
   void WaitRequest();
   void QueryStatus();
-  explicit OVInferRequest(ov::InferRequest obj) { ovInfReq = obj; }
-  OVInferRequest() { ovInfReq = ov::InferRequest(); }
+  explicit OVInferRequest(ov::InferRequest obj) : ovInfReq(obj) {}
+  OVInferRequest() : ovInfReq(ov::InferRequest()) {}
   ov::InferRequest& GetNewObj() {
     return ovInfReq;
   }
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
index 3970bf6ff68a7..714d5b03baae3 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@@ -1,11 +1,13 @@
 // Copyright (C) 2019- Intel Corporation
 // Licensed under the MIT License
+#include <map>
+#include <unordered_set>
 
 #include "core/providers/shared_library/provider_api.h"
-#include "../backend_utils.h"
-#include "../backend_manager.h"
-#include "capability.h"
-#include "utils.h"
+#include "core/providers/openvino/backend_utils.h"
+#include "core/providers/openvino/backend_manager.h"
+#include "core/providers/openvino/ov_versions/capability.h"
+#include "core/providers/openvino/ov_versions/utils.h"
 #include "openvino/core/version.hpp"
 
 #if defined(_MSC_VER)
@@ -25,22 +27,23 @@ namespace openvino_ep {
 
 // Constructor
 GetCapability::GetCapability(const GraphViewer& graph_viewer_param,
-                             const std::string device_type_param,
-                             const std::string device_precision)
-    : graph_viewer_(graph_viewer_param), device_type_(device_type_param), device_precision_(device_precision) {
+                             const std::string device_type_param)
+    : graph_viewer_(graph_viewer_param), device_type_(device_type_param) {
   if (device_type_.find("NPU") != std::string::npos) {
-    device_type_ = "CPU_FP32";
+    device_type_ = "CPU";
   }
 #if OPENVINO_VERSION_MAJOR == 2023 && OPENVINO_VERSION_MINOR == 1
-  data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_, device_precision_);
+  data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_);
 #elif OPENVINO_VERSION_MAJOR == 2023 && OPENVINO_VERSION_MINOR == 2
-  data_ops_ = new DataOps(graph_viewer_, V_2023_2, device_type_, device_precision_);
+  data_ops_ = new DataOps(graph_viewer_, V_2023_2, device_type_);
 #elif OPENVINO_VERSION_MAJOR == 2023 && OPENVINO_VERSION_MINOR == 3
-  data_ops_ = new DataOps(graph_viewer_, V_2023_3, device_type_, device_precision_);
+  data_ops_ = new DataOps(graph_viewer_, V_2023_3, device_type_);
 #elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 0
-  data_ops_ = new DataOps(graph_viewer_, V_2024_0, device_type_, device_precision_);
+  data_ops_ = new DataOps(graph_viewer_, V_2024_0, device_type_);
+#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 1
+  data_ops_ = new DataOps(graph_viewer_, V_2024_1, device_type_);
 #else
-  data_ops_ = new DataOps(graph_viewer_, V_2024_0, device_type_, device_precision_);
+  data_ops_ = new DataOps(graph_viewer_, V_2024_1, device_type_);
 #endif
 }
 
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.h b/onnxruntime/core/providers/openvino/ov_versions/capability.h
index d9fe5a95ef833..a908bf26247fb 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.h
@@ -5,7 +5,7 @@
 #include <vector>
 #include <string>
 #include <memory>
-#include "data_ops.h"
+#include "core/providers/openvino/ov_versions/data_ops.h"
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -14,14 +14,12 @@ class GetCapability {
  private:
   const GraphViewer& graph_viewer_;
   std::string device_type_;
-  std::string device_precision_;
   DataOps* data_ops_;
   bool is_wholly_supported_graph_ = false;
 
  public:
   GetCapability(const GraphViewer& graph_viewer_param,
-                const std::string device_type_param,
-                const std::string precision);
+                const std::string device_type_param);
   virtual std::vector<std::unique_ptr<ComputeCapability>> Execute();
   bool IsWhollySupportedGraph() {
     return is_wholly_supported_graph_;
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index c7c3e93595719..5d7956f6fb559 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -9,12 +9,12 @@
 #include <set>
 
 #include "core/providers/shared_library/provider_api.h"
-#include "../backend_utils.h"
-#include "../backend_manager.h"
-#include "data_ops.h"
-#include "capability.h"
-#include "utils.h"
-#include "../ov_interface.h"
+#include "core/providers/openvino/backend_utils.h"
+#include "core/providers/openvino/backend_manager.h"
+#include "core/providers/openvino/ov_interface.h"
+#include "core/providers/openvino/ov_versions/data_ops.h"
+#include "core/providers/openvino/ov_versions/capability.h"
+#include "core/providers/openvino/ov_versions/utils.h"
 
 #if defined(_MSC_VER)
 #pragma warning(disable : 4244 4245 5208)
@@ -122,6 +122,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"Dropout", V_2020_4, {"CPU", "GPU"}},
     {"Elu", V_2020_4, {"CPU", "GPU"}},
     {"Einsum", V_2023_1, {"CPU", "GPU"}},
+    {"EPContext", V_2024_0, {"CPU", "GPU", "NPU"}},
     {"Equal", V_2020_4, {"CPU", "GPU"}},
     {"Erf", V_2020_4, {"CPU", "GPU"}},
     {"Exp", V_2020_4, {"CPU", "GPU"}},
@@ -360,238 +361,22 @@ void DataOps::populate_op_mode_supported() {
 
   // populate unsupportedmode_t
   {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
+    UnsupportedOpMode obj = {{V_2024_1},
                              [this](const Node* node, const InitializedTensorSet&) {
-                               // Abs is not supproted with INT8 or INT32 as input data type on GPU
-                               if ((device_id_.find("GPU") != std::string::npos)) {
-                                 for (size_t i = 0; i < node->InputDefs().size(); i++) {
-                                   if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
-                                           ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8 ||
-                                       node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
-                                           ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
-                                     return true;
-                                 }
-                               }
-                               return false;
-                             }};
-    op_list_.insert({"Abs", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               // tensor type does not support select last index
-                               auto& attributes = node->GetAttributes();
-                               auto last_index_arg =
-                                   attributes.count("select_last_index") > 0 ? attributes.at("select_last_index").i()
-                                                                             : 0;
-                               if (last_index_arg != 0)
-                                 return true;
-                               // tensor type supports float as input for argmax and argmin
-                               if (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() !=
-                                   ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)
-                                 return true;
-                               return false;
-                             }};
-    op_list_.insert({"ArgMax", obj});
-    op_list_.insert({"ArgMin", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               if (device_id_.find("GPU") != std::string::npos) {
-                                 // int64 data type is not supported on GPU
-                                 const bool data_is_int64 =
-                                     node->InputDefs()[0]->Type()->find("int64") != std::string::npos;
-                                 return data_is_int64;
-                               }
-                               return false;
-                             }};
-    op_list_.insert({"Clip", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               if (device_id_.find("GPU") != std::string::npos) {
-                                 bool if_bias = false;
-                                 const auto& attributes = node->GetAttributes();
-                                 auto conv_filter = attributes.find("kernel_shape");
-                                 if (conv_filter != attributes.end()) {
-                                   auto& ints = conv_filter->second().ints();
-                                   // check if the Input for the op has bias
-                                   if (node->InputDefs().size() > 2) {
-                                     if (node->InputDefs()[2]->Name() == "B")
-                                       if_bias = true;
-                                   }
-                                   // If the kernel size is 3D and the input doesnot have bias,
-                                   // the op is rejected in case of GPU
-                                   if (ints.size() == 3 && !if_bias)
-                                     return true;
-                                 }
-                               }
-                               return false;
-                             }};
-    op_list_.insert({"Conv", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               if (device_id_.find("GPU") != std::string::npos) {
-                                 // If the device is GPU, only 2D dilations with 1x1 pixel are supported
-                                 const auto& attributes = node->GetAttributes();
-                                 auto dilation = attributes.find("dilations");
-                                 if (dilation != attributes.end()) {
-                                   auto& dilation_attr = attributes.at("dilations");
-                                   auto int_size = dilation_attr.ints_size();
-                                   if (int_size == 2) {
-                                     if (dilation_attr.ints(0) != 1 || dilation_attr.ints(1) != 1) {
-                                       return true;
-                                     }
-                                   }
-                                   // If 3D dilations, reject the op
-                                   if (int_size == 3)
-                                     return true;
-                                 }
-                                 auto group_attr = attributes.find("group");
-                                 // group 4 is not supported
-                                 if (group_attr->second().i() == 4)
-                                   return true;
-                               }
-                               return false;
-                             }};
-    op_list_.insert({"ConvTranspose", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               if (device_id_.find("GPU") != std::string::npos && node->OpType() == "If") {
-                                 // Only Equal op is supported as input for IF op in GPU
-                                 for (auto nit = node->InputNodesBegin(); nit != node->InputNodesEnd(); ++nit) {
-                                   if (nit->OpType() == "Equal") {
-                                     return false;
-                                   }
-                                 }
-                               }
-                               return true;
-                             }};
-    op_list_.insert({"If", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               const auto& attributes = node->GetAttributes();
-                               // dilations attrs are not supported yet for Maxpool
-                               if (attributes.find("dilations") != attributes.end())
-                                 return true;
-                               return (!this->dimension_unsupported(node));
-                             }};
-    op_list_.insert({"MaxPool", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               if (device_id_.find("GPU") != std::string::npos) {
-                                 auto x_data_type = node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
-                                 auto y_data_type = node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
-                                 // currently both inputs with int32 are not supported
-                                 // and also both input datatypes should be same
-                                 const bool A_is_int32 =
-                                     node->InputDefs()[0]->Type()->find("int32") != std::string::npos;
-                                 const bool B_is_int32 =
-                                     node->InputDefs()[1]->Type()->find("int32") != std::string::npos;
-                                 if ((A_is_int32 && B_is_int32) || (x_data_type != y_data_type))
+                               // If the Input of ReduceMax op is UINT8, it is rejected (Due to output mismatch)
+                               for (size_t i = 0; i < node->InputDefs().size(); i++) {
+                                 if ((node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                      ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8) ||
+                                     (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                      ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8))
                                    return true;
                                }
                                return false;
                              }};
-    op_list_.insert({"Mod", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               if (device_id_.find("GPU") != std::string::npos) {
-                                 auto x_data_type = node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
-                                 auto y_data_type = node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
-                                 return x_data_type != y_data_type;
-                               }
-                               // currently both inputs with int32 or int64 datatype are not supported
-                               const bool A_is_int32 = node->InputDefs()[0]->Type()->find("int32") != std::string::npos;
-                               const bool B_is_int32 = node->InputDefs()[1]->Type()->find("int32") != std::string::npos;
-                               const bool A_is_int64 = node->InputDefs()[0]->Type()->find("int64") != std::string::npos;
-                               const bool B_is_int64 = node->InputDefs()[1]->Type()->find("int64") != std::string::npos;
-                               if ((A_is_int32 && B_is_int32) || (A_is_int64 && B_is_int64))
-                                 return true;
-                               return false;
-                             }};
-    op_list_.insert({"Pow", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               // Max op with one input is not supporting for GPU_FP16
-                               if (device_id_.find("GPU") != std::string::npos) {
-                                 if (device_precision_ == "FP16") {
-                                   if (node->InputDefs().size() == 1) {
-                                     return true;
-                                   }
-                                 }
-                               }
-                               return false;
-                             }};
-    op_list_.insert({"Max", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               // Min op with one input is not supporting for GPU_FP16
-                               if (device_id_.find("GPU") != std::string::npos) {
-                                 if (device_precision_ == "FP16") {
-                                   if (node->InputDefs().size() == 1) {
-                                     return true;
-                                   }
-                                 }
-                               }
-                               return false;
-                             }};
-    op_list_.insert({"Min", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               // Sum op with one input is not supporting for GPU_FP16
-                               if (device_id_.find("GPU") != std::string::npos) {
-                                 if (device_precision_ == "FP16") {
-                                   if (node->InputDefs().size() == 1) {
-                                     return true;
-                                   }
-                                 }
-                               }
-                               return false;
-                             }};
-    op_list_.insert({"Sum", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet& initializers) {
-                               if (device_id_.find("GPU") != std::string::npos) {
-                                 auto slope = node->InputDefs()[1];
-                                 // PRelu slope has to be an initializer or needs to come from a constant node
-                                 if (initializers.count(slope->Name())) {
-                                   return false;
-                                 } else {
-                                   for (auto input_node = node->InputNodesBegin();
-                                        input_node != node->InputNodesEnd(); ++input_node) {
-                                     if (GetInputCount(
-                                             this->graph_viewer_.GetNode((*input_node).Index()), initializers) == 0)
-                                       return false;
-                                   }
-                                 }
-                               }
-                               return true;
-                             }};
-    op_list_.insert({"PRelu", obj});
+    op_list_.insert({"ReduceMax", obj});
   }
   {
-    UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0},
+    UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1},
                              [this](const Node* node, const InitializedTensorSet&) {
                                const auto& input_arg = node->InputDefs()[1];
                                auto shape = input_arg->Shape();
@@ -608,105 +393,7 @@ void DataOps::populate_op_mode_supported() {
     op_list_.insert({"Reshape", obj});
   }
   {
-    UnsupportedOpMode obj = {{V_2022_1},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               auto& attributes = node->GetAttributes();
-                               if (attributes.count("mode") == 1 && attributes.at("mode").s() == "linear") {
-                                 if (node->InputDefs().size() == 4) {
-                                   return true;
-                                 }
-                               }
-                               return false;
-                             }};
-    op_list_.insert({"Resize", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               if (device_id_.find("GPU") != std::string::npos) {
-                                 // INT32 dataype is not supported as input
-                                 for (size_t i = 0; i < node->InputDefs().size(); i++) {
-                                   if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
-                                       ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
-                                     return true;
-                                 }
-                               }
-                               return false;
-                             }};
-    op_list_.insert({"ReduceLogSumExp", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               if (device_id_.find("GPU") != std::string::npos) {
-                                 auto output_data_type =
-                                     node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
-                                 // If the output of ScatterND op is BOOL, it is rejected for GPU.
-                                 if (output_data_type ==
-                                     ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)
-                                   return true;
-                               }
-                               return false;
-                             }};
-    op_list_.insert({"ScatterND", obj});
-    op_list_.insert({"ScatterElements", obj});
-    op_list_.insert({"Scatter", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               // If the Input of Shrink op is UINT8, it is rejected (Due to output mismatch)
-                               for (size_t i = 0; i < node->InputDefs().size(); i++) {
-                                 if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
-                                     ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8)
-                                   return true;
-                               }
-                               return false;
-                             }};
-    op_list_.insert({"Shrink", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
-                             [this](const Node* node, const InitializedTensorSet& initializers) {
-                               // start, end, axes need to be a initializer
-                               bool cond_for_slice = false;
-                               const auto& data_arg = node->InputDefs()[0];
-                               auto graph_inputs = graph_viewer_.GetInputs();
-
-                               auto it = find(graph_inputs.begin(), graph_inputs.end(), data_arg);
-                               if (it != graph_inputs.end()) {
-                                 if (node->InputDefs().size() > 1) {
-                                   const auto& start_arg = node->InputDefs()[1];
-                                   const auto& end_arg = node->InputDefs()[2];
-                                   cond_for_slice |= initializers.find(start_arg->Name()) == initializers.end();
-                                   cond_for_slice |= initializers.find(end_arg->Name()) == initializers.end();
-                                 }
-                                 if (node->InputDefs().size() > 3) {
-                                   const auto& axes_arg = node->InputDefs()[3];
-                                   cond_for_slice |= initializers.find(axes_arg->Name()) == initializers.end();
-                                 }
-                               }
-
-                               return cond_for_slice;
-                             }};
-    op_list_.insert({"Slice", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2},
-                             [this](const Node* node, const InitializedTensorSet&) {
-                               if (device_id_.find("GPU") != std::string::npos) {
-                                 if (node->InputDefs().size() > 1 &&
-                                     (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() ==
-                                      ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)) {
-                                   return true;
-                                 }
-                               }
-                               return false;
-                             }};
-    op_list_.insert({"Squeeze", obj});
-  }
-  {
-    UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0},
+    UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1},
                              [this](const Node* node, const InitializedTensorSet&) {
                                // If the operator is unsqueeze
                                // If axes is an input, then we cannot produce a static graph.
@@ -721,7 +408,7 @@ void DataOps::populate_op_mode_supported() {
     op_list_.insert({"Unsqueeze", obj});
   }
   {
-    UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0},
+    UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1},
                              [this](const Node* node, const InitializedTensorSet&) {
                                // check for attributes
                                auto& upsample_attr = node->GetAttributes();
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
index 0990904908111..89b738de1d980 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
@@ -27,7 +27,8 @@ enum versionNum {
   V_2023_1,
   V_2023_2,
   V_2023_3,
-  V_2024_0
+  V_2024_0,
+  V_2024_1
 };
 
 using VersionNum = enum versionNum;
@@ -62,8 +63,8 @@ class DataOps {
   std::set<Pairs> supported_types_initializer_;
 
  protected:
-  virtual void populate_op_mode_supported();
-  virtual void populate_types_supported();
+  void populate_op_mode_supported();
+  void populate_types_supported();
   bool op_is_supported(std::string name, std::vector<SupportedOp>& list);
   bool dimension_unsupported(const Node* node);
   bool unsupported_op_mode(const Node* node);
@@ -71,8 +72,9 @@ class DataOps {
   bool node_is_supported(const NodeIndex node_idx);
 
  public:
-  DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, const std::string dev_id, const std::string device_precision)
-      : graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id), device_precision_(device_precision) {
+  DataOps(const GraphViewer& graph_viewer_param, VersionNum ver,
+          const std::string dev_id)
+      : graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id) {
     populate_op_mode_supported();
     populate_types_supported();
   }
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index fda41161ac40a..507c094422509 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1669,9 +1669,6 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O
     ov_options_converted_map["enable_npu_fast_compile"] = "true";
   }
 
-  if (legacy_ov_options->device_id != nullptr)
-    ov_options_converted_map["device_id"] = legacy_ov_options->device_id;
-
   if (legacy_ov_options->num_of_threads != '\0')
     ov_options_converted_map["num_of_threads"] = std::to_string(legacy_ov_options->num_of_threads);
 
@@ -1694,6 +1691,8 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O
 
   // Add new provider option below
   ov_options_converted_map["num_streams"] = "1";
+  ov_options_converted_map["export_ep_ctx_blob"] = "false";
+  ov_options_converted_map["model_priority"] = "DEFAULT";
   return ov_options_converted_map;
 }
 
@@ -1703,7 +1702,6 @@ std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Creat
 }
 
 std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Create(const ProviderOptions* provider_options_map) {
-  // std::cout << provider_options_map.at("num_streams") << std::endl;
   return s_library_openvino.Get().CreateExecutionProviderFactory(provider_options_map);
 }
 
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index f8668be50a962..236d2cfeb2b33 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -928,6 +928,9 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
         if (option.first == "device_type") {
           OV_provider_options_map[option.first] = option.second;
           continue;
+        } else if (option.first == "precision") {
+          OV_provider_options_map[option.first] = option.second;
+          continue;
         } else if (option.first == "enable_npu_fast_compile") {
           if (!(option.second == "True" || option.second == "true" ||
                 option.second == "False" || option.second == "false")) {
@@ -960,10 +963,10 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
             value = "true";
           }
           OV_provider_options_map["disable_dynamic_shapes"] = value;
-        } else if (option.first == "device_id") {
+        } else if (option.first == "num_of_threads") {
           OV_provider_options_map[option.first] = option.second;
           continue;
-        } else if (option.first == "num_of_threads") {
+        } else if (option.first == "model_priority") {
           OV_provider_options_map[option.first] = option.second;
           continue;
         } else if (option.first == "num_streams") {
@@ -975,6 +978,9 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
         } else if (option.first == "context") {
           OV_provider_options_map[option.first] = option.second;
           continue;
+        } else if (option.first == "export_ep_ctx_blob") {
+          OV_provider_options_map[option.first] = option.second;
+          continue;
         } else {
           ORT_THROW("Invalid OpenVINO EP option: ", option.first);
         }
diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h
index 22314610dbee9..dc9394a83a4ea 100644
--- a/onnxruntime/python/onnxruntime_pybind_state_common.h
+++ b/onnxruntime/python/onnxruntime_pybind_state_common.h
@@ -48,17 +48,11 @@ struct OrtStatus {
 #endif
 
 #ifdef USE_OPENVINO
-#if OPENVINO_CONFIG_CPU_FP32
-#define BACKEND_OPENVINO "-OPENVINO_CPU_FP32"
+#if OPENVINO_CONFIG_CPU
+#define BACKEND_OPENVINO "-OPENVINO_CPU"
 
-#elif OPENVINO_CONFIG_CPU_FP16
-#define BACKEND_OPENVINO "-OPENVINO_CPU_FP16"
-
-#elif OPENVINO_CONFIG_GPU_FP32
-#define BACKEND_OPENVINO "-OPENVINO_GPU_FP32"
-
-#elif OPENVINO_CONFIG_GPU_FP16
-#define BACKEND_OPENVINO "-OPENVINO_GPU_FP16"
+#elif OPENVINO_CONFIG_GPU
+#define BACKEND_OPENVINO "-OPENVINO_GPU"
 
 #elif OPENVINO_CONFIG_NPU
 #define BACKEND_OPENVINO "-OPENVINO_NPU"
diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc
index b05f58a4e75b2..62291762f61b8 100644
--- a/onnxruntime/test/perftest/command_args_parser.cc
+++ b/onnxruntime/test/perftest/command_args_parser.cc
@@ -78,7 +78,7 @@ namespace perftest {
       "\t    [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
       "\t    [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n"
       "\t    [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n"
-      "\t    [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
+      "\t    [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
       "\n"
       "\t    [QNN only] [backend_path]: QNN backend path. e.g '/folderpath/libQnnHtp.so', '/folderpath/libQnnCpu.so'.\n"
       "\t    [QNN only] [profiling_level]: QNN profiling level, options: 'basic', 'detailed', default 'off'.\n"
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index f93efba81106b..4067f50ebc1df 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -245,11 +245,15 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
       auto value = token.substr(pos + 1);
 
       if (key == "device_type") {
-        std::set<std::string> ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32",
-                                                           "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
-                                                           "GPU.0_FP16", "GPU.1_FP16", "NPU"};
+        std::set<std::string> ov_supported_device_types = {"CPU", "GPU",
+                                                           "GPU.0", "GPU.1", "NPU"};
+        std::set<std::string> deprecated_device_types = {"CPU_FP32", "GPU_FP32",
+                                                         "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
+                                                         "GPU.0_FP16", "GPU.1_FP16"};
         if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) {
           ov_options[key] = value;
+        } else if (deprecated_device_types.find(value) != deprecated_device_types.end()) {
+          ov_options[key] = value;
         } else if (value.find("HETERO:") == 0) {
           ov_options[key] = value;
         } else if (value.find("MULTI:") == 0) {
@@ -258,13 +262,43 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
           ov_options[key] = value;
         } else {
           ORT_THROW(
-              "[ERROR] [OpenVINO] You have selected a wrong configuration value for the key 'device_type'. "
-              "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
-              "'GPU.0_FP16', 'GPU.1_FP16', 'NPU' or from"
+              "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
+              "Select from 'CPU', 'GPU', 'GPU.0', 'GPU.1', 'NPU' or from"
               " HETERO/MULTI/AUTO options available. \n");
         }
       } else if (key == "device_id") {
-        ov_options[key] = value;
+        if (value == "CPU" || value == "GPU" || value == "NPU") {
+          ov_options[key] = value;
+        } else {
+          ORT_THROW("[ERROR] [OpenVINO] Unsupported device_id is selected. Select from available options.");
+        }
+      } else if (key == "precision") {
+        auto device_type = ov_options["device_type"];
+        if (device_type == "CPU") {
+          if (value == "" || value == "ACCURACY" || value == "FP32") {
+            ov_options[key] = "FP32";
+            continue;
+          } else {
+            ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. CPU only supports FP32 . \n");
+          }
+        } else if (device_type == "NPU") {
+          if (value == "" || value == "ACCURACY" || value == "FP16") {
+            ov_options[key] = "FP16";
+            continue;
+          } else {
+            ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. NPU only supported FP16. \n");
+          }
+        } else if (device_type == "GPU") {
+          if (value == "") {
+            ov_options[key] = "FP16";
+            continue;
+          } else if (value == "ACCURACY" || value == "FP16" || value == "FP32") {
+            ov_options[key] = value;
+            continue;
+          } else {
+            ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. GPU only supported FP32 / FP16. \n");
+          }
+        }
       } else if (key == "enable_npu_fast_compile") {
         if (value == "true" || value == "True" ||
             value == "false" || value == "False") {
@@ -294,6 +328,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
         } else {
           ov_options[key] = value;
         }
+      } else if (key == "model_priority") {
+        ov_options[key] = value;
       } else if (key == "cache_dir") {
         ov_options[key] = value;
       } else if (key == "context") {
@@ -304,6 +340,15 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
         } else {
           ov_options[key] = value;
         }
+      } else if (key == "export_ep_ctx_blob") {
+        if (value == "true" || value == "True" ||
+            value == "false" || value == "False") {
+          ov_options[key] = value;
+        } else {
+          ORT_THROW(
+              "[ERROR] [OpenVINO] The value for the key 'export_ep_ctx_blob' "
+              "should be a boolean i.e. true or false. Default value is false.\n");
+        }
       } else {
         ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_npu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling', 'disable_dynamic_shapes'] \n");
       }
diff --git a/onnxruntime/test/providers/cpu/activation/activation_op_test.h b/onnxruntime/test/providers/cpu/activation/activation_op_test.h
index 9a74d763a13e3..409409f56c51c 100644
--- a/onnxruntime/test/providers/cpu/activation/activation_op_test.h
+++ b/onnxruntime/test/providers/cpu/activation/activation_op_test.h
@@ -42,7 +42,7 @@ inline void TestActivationOp(const char* szOp, const std::vector<std::vector<T>>
     }
 
 // Disabled because of accuracy issues for GPU
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
     int leaky = strcmp(szOp, "LeakyRelu");
     if (leaky == 0) {
       excluded_providers.insert(kOpenVINOExecutionProvider);
diff --git a/onnxruntime/test/providers/cpu/math/clip_test.cc b/onnxruntime/test/providers/cpu/math/clip_test.cc
index b5d5f84df950a..6f81bbbe31d54 100644
--- a/onnxruntime/test/providers/cpu/math/clip_test.cc
+++ b/onnxruntime/test/providers/cpu/math/clip_test.cc
@@ -23,7 +23,7 @@ TEST(MathOpTest, Clip_6) {
                         {10.0f, 4.4f, 10.0f,
                          -1.3f, 3.5f, 10.0f,
                          -5.4f, 9.3f, 10.0f});
-#if defined(OPENVINO_CONFIG_CPU_FP32) || defined(OPENVINO_CONFIG_CPU_FP16)
+#if defined(OPENVINO_CONFIG_CPU)
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 #else
   test.Run();
diff --git a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc
index c02486a2ec26f..0d12cb94799c4 100644
--- a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc
+++ b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc
@@ -156,7 +156,7 @@ TEST(MathOpTest, Add_float) {
   test.AddInput<float>("B", dims, rhs_values);
   test.AddOutput<float>("C", dims, out_values);
 
-#if defined(OPENVINO_CONFIG_GPU_GP16)
+#if defined(OPENVINO_CONFIG_GPU)
   test.Run(OpTester::ExpectResult::kExpectSuccess, "",
            {kOpenVINOExecutionProvider});  // OpenVINO: Disabled due to accuracy mismatch for FP16
 #else
@@ -219,7 +219,7 @@ TEST(MathOpTest, Add_Broadcast_MultidirectionalAB) {
   test.AddInput<float>("A", {3, 1}, lhs_values);
   test.AddInput<float>("B", {3}, rhs_values);
   test.AddOutput<float>("C", {3, 3}, out_values);
-#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16)
+#if defined(OPENVINO_CONFIG_GPU)
   test.Run(OpTester::ExpectResult::kExpectSuccess, "",
            {kTensorrtExecutionProvider,
             kOpenVINOExecutionProvider});  // OpenVINO: disabled temporarily due to accurarcy issues
@@ -245,7 +245,7 @@ TEST(MathOpTest, Add_Broadcast_MultidirectionalBA) {
   test.AddInput<float>("A", {3}, lhs_values);
   test.AddInput<float>("B", {3, 1}, rhs_values);
   test.AddOutput<float>("C", {3, 3}, out_values);
-#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16)
+#if defined(OPENVINO_CONFIG_GPU)
   test.Run(OpTester::ExpectResult::kExpectSuccess, "",
            {kTensorrtExecutionProvider,
             kOpenVINOExecutionProvider});  // OpenVINO: disabled temporarily due to accurarcy issues
@@ -423,8 +423,8 @@ TEST(MathOpTest, Add_Broadcast_2x1x1_3x4) {
 
   std::unordered_set<std::string> excluded_providers;
   excluded_providers.insert(kTensorrtExecutionProvider);
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
-  // OpenVINO GPU: Disabled temporarily due to accuarcy issues
+#if defined(OPENVINO_CONFIG_GPU)
+  // OpenVINO GPU: Disabled temporarily due to accuracy issues
   // OpenVINO VPU: Disabled due to software limitation
   excluded_providers.insert(kOpenVINOExecutionProvider);
 #endif
@@ -726,7 +726,7 @@ TEST(MathOpTest, Ceil) {
   test.AddOutput<float>("Y", dims,
                         {-1.0f, 1.0f,
                          0.0f, 11.0f});
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
   // OpenVINO: Disabled due to software limitation for GPU and VPU Plugins.
   // This test runs fine on CPU Plugin
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
@@ -744,7 +744,7 @@ TEST(MathOpTest, Ceil_double) {
   test.AddOutput<double>("Y", dims,
                          {-1.0, 1.0,
                           0.0, 11.0});
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
   // OpenVINO: Disabled due to software limitation for GPU and VPU Plugins.
   // This test runs fine on CPU Plugin
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
@@ -1195,7 +1195,7 @@ TEST(MathOpTest, Sum_6) {
                          -6.0f, 6.6f, 28.0f,
                          -1.0f, 0.06f, 0.25f});
 
-#if defined(OPENVINO_CONFIG_GPU_FP16)
+#if defined(OPENVINO_CONFIG_GPU)
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});  // OpenVINO EP: Disabled due to accuracy mismatch for FP16
 #else
   test.Run();
@@ -1222,7 +1222,7 @@ TEST(MathOpTest, Sum_6_double) {
                           -6.0, 6.6, 28.0,
                           -1.0, 0.06, 0.25});
 
-#if defined(OPENVINO_CONFIG_GPU_FP16)
+#if defined(OPENVINO_CONFIG_GPU)
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});  // OpenVINO EP: Disabled due to accuracy mismatch for FP16
 #else
   test.Run();
@@ -1246,7 +1246,7 @@ TEST(MathOpTest, Sum_8_Test1) {
                          311.0f, 312.0f, 313.0f,
                          321.0f, 322.0f, 323.0f,
                          331.0f, 332.0f, 333.0f});
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
   // OpenVINO: Disabled due to software limitation for GPU and VPU Plugins.
   // This test runs fine on CPU Plugin
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
@@ -1272,7 +1272,7 @@ TEST(MathOpTest, Sum_8_Test1_double) {
                           311.0, 312.0, 313.0,
                           321.0, 322.0, 323.0,
                           331.0, 332.0, 333.0});
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
   // OpenVINO: Disabled due to software limitation for GPU and VPU Plugins.
   // This test runs fine on CPU Plugin
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
@@ -1306,7 +1306,7 @@ TEST(MathOpTest, Sum_8_Test2) {
                          3.3f, 4.4f, -94.7f,
                          59.6f, 64.01f, -8.0f});
 
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
   // OpenVINO: Disabled temporarily due to accuracy issues
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider});  // TensorRT: Input batch size is inconsistent
 #else
@@ -1340,7 +1340,7 @@ TEST(MathOpTest, Sum_8_Test2_double) {
                           3.3, 4.4, -94.7,
                           59.6, 64.01, -8.0});
 
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
   // OpenVINO: Disabled temporarily due to accuracy issues
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider});  // TensorRT: Input batch size is inconsistent
 #else
diff --git a/onnxruntime/test/providers/cpu/math/gemm_test.cc b/onnxruntime/test/providers/cpu/math/gemm_test.cc
index 1a542fb67418e..7ec84d87b2a8b 100644
--- a/onnxruntime/test/providers/cpu/math/gemm_test.cc
+++ b/onnxruntime/test/providers/cpu/math/gemm_test.cc
@@ -366,7 +366,7 @@ TYPED_TEST(GemmOpTypedTests, TestGemmBroadcast) {
                                static_cast<TypeParam>(-9.0f), static_cast<TypeParam>(-8.0f), static_cast<TypeParam>(-7.0f)});
 
     std::unordered_set<std::string> excluded_providers;
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
     excluded_providers.insert(kOpenVINOExecutionProvider);  // OpenVINO: Temporarily disabled due to accuracy issues
 #endif
 
@@ -405,7 +405,7 @@ TYPED_TEST(GemmOpTypedTests, TestGemmTrans) {
   test.AddOutput<TypeParam>("Y", {2, 3},
                             {static_cast<TypeParam>(11.0f), static_cast<TypeParam>(11.0f), static_cast<TypeParam>(11.0f),
                              static_cast<TypeParam>(-9.0f), static_cast<TypeParam>(-9.0f), static_cast<TypeParam>(-9.0f)});
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
   test.ConfigExcludeEps({kOpenVINOExecutionProvider});  // OpenVINO: Temporarily disabled due to accuracy issues
 #endif
   test.Config(run_with_tunable_op)
@@ -431,7 +431,7 @@ TYPED_TEST(GemmOpTypedTests, TestGemmTransB) {
     test.AddOutput<TypeParam>("Y", {2, 3},
                               {static_cast<TypeParam>(11.0f), static_cast<TypeParam>(11.0f), static_cast<TypeParam>(11.0f),
                                static_cast<TypeParam>(-9.0f), static_cast<TypeParam>(-9.0f), static_cast<TypeParam>(-9.0f)});
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
     test.ConfigExcludeEps({kOpenVINOExecutionProvider});  // OpenVINO: Temporarily disabled due to accuracy issues
 #endif
     test.Config(run_with_tunable_op)
@@ -461,7 +461,7 @@ TYPED_TEST(GemmOpTypedTests, TestGemmTransB_1) {
     test.AddOutput<TypeParam>("Y", {2, 3},
                               {static_cast<TypeParam>(11.0f), static_cast<TypeParam>(11.0f), static_cast<TypeParam>(11.0f),
                                static_cast<TypeParam>(-9.0f), static_cast<TypeParam>(-9.0f), static_cast<TypeParam>(-9.0f)});
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
     test.ConfigExcludeEps({kOpenVINOExecutionProvider});  // OpenVINO: Temporarily disabled due to accuracy issues
 #endif
     test.Config(run_with_tunable_op)
@@ -491,7 +491,7 @@ TYPED_TEST(GemmOpTypedTests, TestGemmAlpha) {
   // test.AddOutput<TypeParam>("Y", {2, 3},
   //                   {5.0f, 5.0f, 5.0f,
   //                    -5.0f, -5.0f, -5.0f});
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
   test.ConfigExcludeEps({kOpenVINOExecutionProvider});  // OpenVINO: Temporarily disabled due to accuracy issues
 #else
   test.ConfigExcludeEps({kTensorrtExecutionProvider});  // TensorRT: Seg fault in parser
@@ -516,7 +516,7 @@ TYPED_TEST(GemmOpTypedTests, TestGemmBeta) {
   test.AddOutput<TypeParam>("Y", {2, 3},
                             {static_cast<TypeParam>(12.0f), static_cast<TypeParam>(12.0f), static_cast<TypeParam>(12.0f),
                              static_cast<TypeParam>(-8.0f), static_cast<TypeParam>(-8.0f), static_cast<TypeParam>(-8.0f)});
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
   test.ConfigExcludeEps({kOpenVINOExecutionProvider});  // OpenVINO: Temporarily disabled due to accuracy issues
 #else
   test.ConfigExcludeEps({kTensorrtExecutionProvider});  // TensorRT: Seg fault in parser
@@ -564,7 +564,7 @@ TYPED_TEST(GemmOpTypedTests, TestGemmAlphaBeta) {
   test.AddOutput<TypeParam>("Y", {2, 3},
                             {static_cast<TypeParam>(7.0f), static_cast<TypeParam>(7.0f), static_cast<TypeParam>(7.0f),
                              static_cast<TypeParam>(-3.0f), static_cast<TypeParam>(-3.0f), static_cast<TypeParam>(-3.0f)});
-#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
   test.ConfigExcludeEps({kOpenVINOExecutionProvider});  // OpenVINO: Temporarily disabled due to accuracy issues
 #else
   test.ConfigExcludeEps({kTensorrtExecutionProvider});  // TensorRT: Seg fault in parser
diff --git a/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc b/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc
index d91a1de3faa6e..b0d97410ac9b3 100644
--- a/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc
@@ -50,7 +50,7 @@ void TestBatchNorm(const unordered_map<string, vector<T>>& input_data_map,
   }
 
 // OpenVINO: Disabled due to software limitations
-#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_CPU_FP32) || defined(OPENVINO_CONFIG_CPU_FP16)
+#if defined(OPENVINO_CONFIG_GPU) || defined(OPENVINO_CONFIG_CPU)
   excluded_eps.insert(kOpenVINOExecutionProvider);
 #endif
   test.Run(expect_result, err_str, excluded_eps);
diff --git a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc
index c8cf183291518..885fb11c6e999 100644
--- a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc
@@ -346,7 +346,7 @@ TEST(PoolTest, MaxPool2D_uint8) {
   test.AddInput<uint8_t>("Input", {1, 1, 5, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25});
 
   test.AddOutput<uint8_t>("Output", output_shape, output);
-#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16)
+#if defined(OPENVINO_CONFIG_GPU)
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 #else
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {});
diff --git a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
index 2902995df1e71..98a65b8efffd2 100644
--- a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
+++ b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
@@ -43,7 +43,7 @@ void TestReduceOp(const std::string& op,
   test.AddAttribute("keepdims", keepdims);
   test.AddInput<float>("data", input_dims, data);
   test.AddOutput<OutT>("reduced", expected_dims, expected_data);
-#if defined(OPENVINO_CONFIG_GPU_FP32)
+#if defined(OPENVINO_CONFIG_GPU)
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kOpenVINOExecutionProvider, kTensorrtExecutionProvider});  // TensorRT,OpenVINO: result differs
 #else
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});  // TensorRT: result differs
@@ -1356,7 +1356,7 @@ TEST(ReductionOpTest, ReduceMax_int32) {
                           11, 12});
   test.AddOutput<int32_t>("reduced", {3, 1, 1}, {4, 8, 12});
 
-#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16)
+#if defined(OPENVINO_CONFIG_GPU)
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider});  // OpenVINO: Disabled temporarily
 #else
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  // TensorRT: axis must be 0
@@ -1377,7 +1377,7 @@ TEST(ReductionOpTest, ReduceMax_int64) {
                           9, 10,
                           11, 12});
   test.AddOutput<int64_t>("reduced", {3, 1, 1}, {4, 8, 12});
-#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16)
+#if defined(OPENVINO_CONFIG_GPU)
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider});  // OpenVINO: Disabled temporarily
 #else
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  // TensorRT: axis must be 0
diff --git a/onnxruntime/test/providers/cpu/tensor/scatter_op_test.cc b/onnxruntime/test/providers/cpu/tensor/scatter_op_test.cc
index b1dfec7951338..6b587be7d74eb 100644
--- a/onnxruntime/test/providers/cpu/tensor/scatter_op_test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/scatter_op_test.cc
@@ -289,7 +289,7 @@ static void scatter_bool_with_axis_tests(const char* op_name, int op_version) {
   test.AddInput<int64_t>("indices", {1, 2}, {1, 3});
   test.AddInput<bool>("updates", {1, 2}, {true, false});
   test.AddOutput<bool>("y", {1, 5}, {false, true, false, false, false});
-#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16)
+#if defined(OPENVINO_CONFIG_GPU)
   test.Run(OpTester::ExpectResult::kExpectSuccess, "",
            {kCudaNHWCExecutionProvider, kOpenVINOExecutionProvider});  // OpenVINO: Disabled due to failure for GPU
 #else
diff --git a/onnxruntime/test/providers/cpu/tensor/where_op_test.cc b/onnxruntime/test/providers/cpu/tensor/where_op_test.cc
index 7308041194bf5..6237521b34dfd 100644
--- a/onnxruntime/test/providers/cpu/tensor/where_op_test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/where_op_test.cc
@@ -62,7 +62,7 @@ void WhereBroadcastTest(const T& x_value, const T& y_value) {
     }
     test.AddOutput<T>("output", {3, 3, 3}, result);
 
-#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16)
+#if defined(OPENVINO_CONFIG_GPU)
     test.Run(OpTester::ExpectResult::kExpectSuccess, "",
              {kOpenVINOExecutionProvider});  // OpenVINO: Disabled due to failure for GPU
 #else
@@ -86,7 +86,7 @@ void WhereBroadcastTest(const T& x_value, const T& y_value) {
     }
     test.AddOutput<T>("output", {3, 3, 3}, result);
 
-#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16)
+#if defined(OPENVINO_CONFIG_GPU)
     test.Run(OpTester::ExpectResult::kExpectSuccess, "",
              {kOpenVINOExecutionProvider});  // OpenVINO: Disabled due to failure for GPU
 #else
diff --git a/onnxruntime/test/python/onnx_backend_test_series.py b/onnxruntime/test/python/onnx_backend_test_series.py
index b8897c98c2a0a..395315b2a2b0c 100644
--- a/onnxruntime/test/python/onnx_backend_test_series.py
+++ b/onnxruntime/test/python/onnx_backend_test_series.py
@@ -134,13 +134,11 @@ def create_backend_test(test_name=None):
         if backend.supports_device("NNAPI"):
             current_failing_tests += apply_filters(filters, "current_failing_tests_NNAPI")
 
-        if backend.supports_device("OPENVINO_GPU_FP32") or backend.supports_device("OPENVINO_GPU_FP16"):
+        if backend.supports_device("OPENVINO_GPU"):
             current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_GPU")
 
-        if backend.supports_device("OPENVINO_CPU_FP32"):
+        if backend.supports_device("OPENVINO_CPU"):
             current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP32")
-
-        if backend.supports_device("OPENVINO_CPU_FP16"):
             current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP16")
 
         if backend.supports_device("OPENVINO_NPU"):
diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
index 0d141d634e051..005128bc05d4a 100644
--- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
+++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
@@ -553,7 +553,7 @@
         "test_reduce_max_bool_inputs_cpu",
         "test_gelu_default_1_cpu", // Disabled due to accuracy mismatch
         "test_gelu_default_2_cpu"
-        
+
     ],
     "current_failing_tests_OPENVINO_NPU": [
         "^test_prelu_broadcast",
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index 33dc403777de6..0f34f2c01cc74 100644
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -73,13 +73,11 @@ def _str_to_bool(s):
 
 
 def _openvino_verify_device_type(device_read):
-    choices = ["CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16", "NPU"]
+    choices = ["CPU", "GPU", "NPU"]
 
     choices1 = [
-        "CPU_FP32_NO_PARTITION",
-        "CPU_FP16_NO_PARTITION",
-        "GPU_FP32_NO_PARTITION",
-        "GPU_FP16_NO_PARTITION",
+        "CPU_NO_PARTITION",
+        "GPU_NO_PARTITION",
         "NPU_NO_PARTITION",
     ]
     status_hetero = True
@@ -534,7 +532,7 @@ def convert_arg_line_to_args(self, arg_line):
     parser.add_argument(
         "--use_openvino",
         nargs="?",
-        const="CPU_FP32",
+        const="CPU",
         type=_openvino_verify_device_type,
         help="Build with OpenVINO for specific hardware.",
     )
@@ -1223,19 +1221,11 @@ def generate_build_tree(
     if args.use_openvino:
         cmake_args += [
             "-Donnxruntime_USE_OPENVINO=ON",
-            "-Donnxruntime_USE_OPENVINO_GPU_FP32=" + ("ON" if args.use_openvino == "GPU_FP32" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_GPU_FP16=" + ("ON" if args.use_openvino == "GPU_FP16" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_CPU_FP32=" + ("ON" if args.use_openvino == "CPU_FP32" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_CPU_FP16=" + ("ON" if args.use_openvino == "CPU_FP16" else "OFF"),
+            "-Donnxruntime_USE_OPENVINO_GPU=" + ("ON" if args.use_openvino == "GPU" else "OFF"),
+            "-Donnxruntime_USE_OPENVINO_CPU=" + ("ON" if args.use_openvino == "CPU" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_NPU=" + ("ON" if args.use_openvino == "NPU" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_GPU_FP32_NP="
-            + ("ON" if args.use_openvino == "GPU_FP32_NO_PARTITION" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_GPU_FP16_NP="
-            + ("ON" if args.use_openvino == "GPU_FP16_NO_PARTITION" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_CPU_FP32_NP="
-            + ("ON" if args.use_openvino == "CPU_FP32_NO_PARTITION" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_CPU_FP16_NP="
-            + ("ON" if args.use_openvino == "CPU_FP16_NO_PARTITION" else "OFF"),
+            "-Donnxruntime_USE_OPENVINO_GPU_NP=" + ("ON" if args.use_openvino == "GPU_NO_PARTITION" else "OFF"),
+            "-Donnxruntime_USE_OPENVINO_CPU_NP=" + ("ON" if args.use_openvino == "CPU_NO_PARTITION" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_NPU_NP=" + ("ON" if args.use_openvino == "NPU_NO_PARTITION" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_HETERO=" + ("ON" if args.use_openvino.startswith("HETERO") else "OFF"),
             "-Donnxruntime_USE_OPENVINO_DEVICE=" + (args.use_openvino),
@@ -2636,7 +2626,7 @@ def main():
         raise BuildError("Using --get-api-doc requires a single build config")
 
     # Disabling unit tests for GPU on nuget creation
-    if args.use_openvino and args.use_openvino != "CPU_FP32" and args.build_nuget:
+    if args.use_openvino and args.use_openvino != "CPU" and args.build_nuget:
         args.test = False
 
     # GDK builds don't support testing
diff --git a/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml
index 03e0274fc198a..45d763384ee2c 100644
--- a/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml
@@ -32,5 +32,5 @@ jobs:
   parameters:
     AgentPool : 'Linux-CPU-2019'
     JobName: 'Linux_CI_Dev'
-    RunDockerBuildArgs: '-o ubuntu20.04 -d openvino -v 2024.0.0 -x "--use_openvino CPU_FP32 --build_wheel"'
+    RunDockerBuildArgs: '-o ubuntu20.04 -d openvino -v 2024.0.0 -x "--use_openvino CPU --build_wheel"'
     TimeoutInMinutes: 120
diff --git a/tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml
index 4c80aedeb1f18..afa0ad6f4cbc7 100644
--- a/tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml
@@ -27,7 +27,7 @@ parameters:
 - name: cpu_build_py_parameters
   displayName: 'Extra parameters to pass to build.py for CPU package.'
   type: string
-  default: '--use_openvino CPU_FP32'
+  default: '--use_openvino CPU'
 
 - name: gpu_build_py_parameters
   displayName: 'Extra parameters to pass to build.py for GPU package.'
diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml
index 01cab936aa529..47b0d2188aa9f 100644
--- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml
@@ -27,7 +27,7 @@ parameters:
 - name: cpu_build_py_parameters
   displayName: 'Extra parameters to pass to build.py for CPU package.'
   type: string
-  default: '--use_openvino CPU_FP32'
+  default: '--use_openvino CPU'
 
 - name: gpu_build_py_parameters
   displayName: 'Extra parameters to pass to build.py for GPU package.'