From df47d755a798bb8822b7ef0e7c101937b86e094b Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 13 Jan 2024 19:17:19 +0000
Subject: [PATCH 01/25] update

---
 .../core/session/provider_bridge_ort.cc       | 123 +++++++++++++++++-
 onnxruntime/test/perftest/ort_test_session.cc |   2 +
 2 files changed, 120 insertions(+), 5 deletions(-)

diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index e2d46012c097b..3ae897ecab1a7 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -88,6 +88,10 @@ using IndexedSubGraph_MetaDef = IndexedSubGraph::MetaDef;
 #include "core/providers/cann/cann_provider_options.h"
 #include "core/providers/dnnl/dnnl_provider_options.h"
 
+#ifdef USE_TENSORRT
+#include "core/session/onnxruntime_session_options_config_keys.h"
+#endif
+
 // The filename extension for a shared library is different per platform
 #ifdef _WIN32
 #define LIBRARY_PREFIX
@@ -1365,10 +1369,6 @@ std::shared_ptr<IExecutionProviderFactory> DnnlProviderFactoryCreator::Create(in
   return s_library_dnnl.Get().CreateExecutionProviderFactory(use_arena);
 }
 
-std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(int device_id) {
-  return s_library_tensorrt.Get().CreateExecutionProviderFactory(device_id);
-}
-
 std::shared_ptr<IExecutionProviderFactory> MIGraphXProviderFactoryCreator::Create(int device_id) {
   return s_library_migraphx.Get().CreateExecutionProviderFactory(device_id);
 }
@@ -1416,6 +1416,95 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti
   return trt_options_converted;
 }
 
+// Get configs from session options that are needed for TensorRT EP
+void UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(OrtSessionOptions* session_options, OrtTensorRTProviderOptionsV2* tensorrt_options) {
+  tensorrt_options->trt_dump_ep_context_model = 1;
+  std::string embed_mode = (session_options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "1");
+  if ("1" == embed_mode) {
+    tensorrt_options->trt_ep_context_embed_mode = 1;
+  } else if ("0" == embed_mode) {
+    tensorrt_options->trt_ep_context_embed_mode = 0;
+  } else {
+    LOGS_DEFAULT(VERBOSE) << "Invalid ep.context_embed_mode: " << embed_mode << " only 0 or 1 allowed. Set to 1.";
+  }
+  LOGS_DEFAULT(VERBOSE) << "User specified context cache embed mode: " << tensorrt_options->trt_ep_context_embed_mode;
+}
+
+void CopyOrtTensorRTProviderOptionsV2(OrtTensorRTProviderOptionsV2* dst, const OrtTensorRTProviderOptionsV2* src, bool string_copy) {
+  if (src == nullptr) {
+    return;
+  }
+  auto copy_string_if_needed = [&](std::string s_in) {
+    if (string_copy) {
+      char* dest = nullptr;
+      auto str_size = s_in.size();
+      if (str_size == 0) {
+        return (const char*)nullptr;
+      } else {
+        dest = new char[str_size + 1];
+#ifdef _MSC_VER
+        strncpy_s(dest, str_size + 1, s_in.c_str(), str_size);
+#else
+        strncpy(dest, s_in.c_str(), str_size);
+#endif
+        dest[str_size] = '\0';
+        return (const char*)dest;
+      }
+    } else {
+      return s_in.c_str();
+    }
+  };
+
+  dst->device_id = src->device_id;
+  dst->has_user_compute_stream = src->has_user_compute_stream;
+  dst->trt_max_partition_iterations = src->trt_max_partition_iterations;
+  dst->trt_min_subgraph_size = src->trt_min_subgraph_size;
+  dst->trt_max_workspace_size = src->trt_max_workspace_size;
+  dst->trt_fp16_enable = src->trt_fp16_enable;
+  dst->trt_int8_enable = src->trt_int8_enable;
+
+  dst->trt_int8_calibration_table_name = copy_string_if_needed(src->trt_int8_calibration_table_name);
+
+  dst->trt_int8_use_native_calibration_table = src->trt_int8_use_native_calibration_table;
+  dst->trt_dla_enable = src->trt_dla_enable;
+  dst->trt_dla_core = src->trt_dla_core;
+  dst->trt_dump_subgraphs = src->trt_dump_subgraphs;
+  dst->trt_engine_cache_enable = src->trt_engine_cache_enable;
+
+  dst->trt_engine_cache_path = copy_string_if_needed(src->trt_engine_cache_path);
+  dst->trt_timing_cache_path = copy_string_if_needed(src->trt_timing_cache_path);
+
+  dst->trt_engine_decryption_enable = src->trt_engine_decryption_enable;
+
+  dst->trt_engine_decryption_lib_path = copy_string_if_needed(src->trt_engine_decryption_lib_path);
+
+  dst->trt_force_sequential_engine_build = src->trt_force_sequential_engine_build;
+  dst->trt_context_memory_sharing_enable = src->trt_context_memory_sharing_enable;
+  dst->trt_layer_norm_fp32_fallback = src->trt_layer_norm_fp32_fallback;
+  dst->trt_timing_cache_enable = src->trt_timing_cache_enable;
+  dst->trt_force_timing_cache = src->trt_force_timing_cache;
+  dst->trt_detailed_build_log = src->trt_detailed_build_log;
+  dst->trt_build_heuristics_enable = src->trt_build_heuristics_enable;
+  dst->trt_sparsity_enable = src->trt_sparsity_enable;
+  dst->trt_builder_optimization_level = src->trt_builder_optimization_level;
+  dst->trt_auxiliary_streams = src->trt_auxiliary_streams;
+
+  dst->trt_tactic_sources = copy_string_if_needed(src->trt_tactic_sources);
+  dst->trt_extra_plugin_lib_paths = copy_string_if_needed(src->trt_extra_plugin_lib_paths);
+  dst->trt_profile_min_shapes = copy_string_if_needed(src->trt_profile_min_shapes);
+  dst->trt_profile_max_shapes = copy_string_if_needed(src->trt_profile_max_shapes);
+  dst->trt_profile_opt_shapes = copy_string_if_needed(src->trt_profile_opt_shapes);
+
+  dst->trt_cuda_graph_enable = src->trt_cuda_graph_enable;
+  dst->trt_dump_ep_context_model = src->trt_dump_ep_context_model;
+  dst->trt_ep_context_embed_mode = src->trt_ep_context_embed_mode;
+  dst->trt_ep_context_compute_capability_enable = src->trt_ep_context_compute_capability_enable;
+}
+
+std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(int device_id) {
+  return s_library_tensorrt.Get().CreateExecutionProviderFactory(device_id);
+}
+
 std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(const OrtTensorRTProviderOptions* provider_options) {
   OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(provider_options);
   return s_library_tensorrt.Get().CreateExecutionProviderFactory(&trt_options_converted);
@@ -1800,7 +1889,31 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_ROCM, _In_ Or
 
 ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptionsV2* tensorrt_options) {
   API_IMPL_BEGIN
-  auto factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options);
+
+  std::shared_ptr<onnxruntime::IExecutionProviderFactory> factory;
+
+  auto ep_context_cache_enabled_from_provider_options = tensorrt_options->trt_dump_ep_context_model != 0;
+  auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") == "1";
+
+  // If EP context configs are provided in session options, we need to propagate them to provider options.
+  // However, if provider options already have the EP context configs provided, the configs in session options
+  // will be ignored since provider options has higher priority than session options.
+  if (!ep_context_cache_enabled_from_provider_options && ep_context_cache_enabled_from_sess_options) {
+    // We need to create another provider V2 object since the tensorrt_options points to the "const" object that can't be updated.
+    OrtTensorRTProviderOptionsV2* new_tensorrt_options = nullptr;
+    if (OrtApis::CreateTensorRTProviderOptions(&new_tensorrt_options) != nullptr) {
+      ORT_THROW("Can't create an OrtProviderOptionsV2 object.");
+    }
+    auto deleter = [](OrtTensorRTProviderOptionsV2* ptr) { OrtApis::ReleaseTensorRTProviderOptions(ptr); };
+    std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(deleter)> rel_trt_options(new_tensorrt_options, deleter);
+
+    onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(options, new_tensorrt_options);
+    onnxruntime::CopyOrtTensorRTProviderOptionsV2(new_tensorrt_options, tensorrt_options, true);
+    factory = onnxruntime::TensorrtProviderFactoryCreator::Create(new_tensorrt_options);
+  } else {
+    factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options);
+  }
+
   if (!factory) {
     return OrtApis::CreateStatus(ORT_FAIL, "OrtSessionOptionsAppendExecutionProvider_TensorRT: Failed to load shared library");
   }
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index 13082fe69cf48..a3371b390b4d0 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -634,6 +634,8 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
     session_options.AddConfigEntry(kOrtSessionOptionsConfigIntraOpThreadAffinities, performance_test_config.run_config.intra_op_thread_affinities.c_str());
   }
 
+  session_options.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1");
+
   if (performance_test_config.run_config.disable_spinning) {
     fprintf(stdout, "Disabling intra-op thread spinning entirely\n");
     session_options.AddConfigEntry(kOrtSessionOptionsConfigAllowIntraOpSpinning, "0");

From 3b8c9baa316a122a95d90aea14f6ff6bf916005e Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 15 Jan 2024 20:23:35 +0000
Subject: [PATCH 02/25] update

---
 .../tensorrt/tensorrt_provider_options.h      |   5 +-
 .../onnxruntime_session_options_config_keys.h |   7 +-
 .../core/graph/contrib_ops/contrib_defs.cc    |   2 +-
 .../tensorrt/onnx_ctx_model_helper.cc         |  81 ++++++++-
 .../tensorrt/onnx_ctx_model_helper.h          |   5 +-
 .../tensorrt/tensorrt_execution_provider.cc   |  30 +++-
 .../tensorrt/tensorrt_execution_provider.h    |   4 +-
 .../tensorrt_execution_provider_info.cc       |   8 +-
 .../tensorrt_execution_provider_info.h        |   3 +-
 .../tensorrt/tensorrt_provider_factory.cc     |   8 +-
 .../tensorrt_provider_factory_creator.h       |   2 +
 .../core/session/provider_bridge_ort.cc       | 155 +++++++-----------
 12 files changed, 191 insertions(+), 119 deletions(-)

diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
index 60196d0c80cbb..b1a751c3468e4 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
@@ -11,6 +11,8 @@
 /// User can only get the instance of OrtTensorRTProviderOptionsV2 via CreateTensorRTProviderOptions.
 /// </summary>
 struct OrtTensorRTProviderOptionsV2 {
+  OrtTensorRTProviderOptionsV2& operator=(const OrtTensorRTProviderOptionsV2& other);  // copy assignment operator
+
   int device_id{0};                                      // cuda device id.
   int has_user_compute_stream{0};                        // indicator of user specified CUDA compute stream.
   void* user_compute_stream{nullptr};                    // user specified CUDA compute stream.
@@ -47,7 +49,8 @@ struct OrtTensorRTProviderOptionsV2 {
   const char* trt_profile_opt_shapes{nullptr};           // Specify the range of the input shapes to build the engine with
   int trt_cuda_graph_enable{0};                          // Enable CUDA graph in ORT TRT
   int trt_dump_ep_context_model{0};                      // Dump EP context node model
+  const char* trt_ep_context_file_path{nullptr};         // Specify file name to dump EP context node model.
   int trt_ep_context_embed_mode{0};                      // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data
-  int trt_ep_context_compute_capability_enable{1};       // Add GPU compute capability as an EP context node's attribute
+  int trt_ep_context_compute_capability_enable{0};       // Add GPU compute capability as an EP context node's attribute
   const char* trt_engine_cache_prefix{nullptr};          // specify engine cache prefix
 };
diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
index df79cb6e5b21b..104e024c43405 100644
--- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
+++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
@@ -249,4 +249,9 @@ static const char* const kOrtSessionOptionEpContextFilePath = "ep.context_file_p
 // Flag to specify whether to dump the EP context into the Onnx model.
 // "0": dump the EP context into separate file, keep the file name in the Onnx model.
 // "1": dump the EP context into the Onnx model. (default).
-static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode";
\ No newline at end of file
+static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode";
+
+// Enable to check whether the hardware architecture matches the EP context node's "hardware_architecture" attribute.
+// "0": disable. (default)
+// "1": enable.
+static const char* const kOrtSessionOptionEpContextHardwareArchitectureEnable = "ep.context_hardware_architecture_enable";
\ No newline at end of file
diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
index 982e8fd834b76..68ded671d7ac8 100644
--- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
@@ -3232,7 +3232,7 @@ void RegisterContribSchemas() {
           OPTIONAL_VALUE)
       .Attr(
           "hardware_architecture",
-          "(Optional) Hardware architecture.",
+          "(Optional) Hardware architecture for running this EP context node.",
           AttributeProto::STRING,
           OPTIONAL_VALUE)
       .Attr(
diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
index 4d8ba6a0891e3..661bfd6603879 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
@@ -137,15 +137,90 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer,
   return model_proto.release();
 }
 
+/*
+ * Get "EP context node" model path
+ * 
+ * 
+ * If ep_context_file_path is provided:
+ *     - If ep_context_file_path is a file:
+ *         - If it's a file name without any path associated with it, return "engine_cache_path/ep_context_file_path".
+           - If it's a file name with path associated with it, return "ep_context_file_path".
+ *     - If ep_context_file_path is a directory, return "ep_context_file_path/original_model_name_ctx.onnx".
+ * If ep_context_file_path is not provided:
+ *     - Return "engine_cache_path/original_model_name_ctx.onnx".
+ * 
+ * 
+ * Example 1:
+ * ep_context_file_path = "/home/user/ep_context_model_foler"
+ * engine_cache_path = "trt_engine.engine"
+ * original_model_path = "model.onnx"
+ * => return "/home/user/ep_context_model_folder/model_ctx.onnx"
+ * 
+ * Example 2:
+ * ep_context_file_path = "my_ctx_model.onnx"
+ * engine_cache_path = "/home/user/cache_folder/trt_engine.engine"
+ * original_model_path = "model.onnx"
+ * => return "/home/user/cache_folder/my_ctx_model.onnx"
+ *
+ * Example 3:
+ * ep_context_file_path = "/home/user2/ep_context_model_foler/my_ctx_model.onnx"
+ * engine_cache_path = "trt_engine.engine"
+ * original_model_path = "model.onnx"
+ * => return "/home/user2/ep_context_model_foler/my_ctx_model.onnx"
+ * 
+ * Example 4:
+ * ep_context_file_path = ""
+ * engine_cache_path = "/home/user3/cache_folder/trt_engine.engine"
+ * original_model_path = "model.onnx"
+ * => return "/home/user3/cache_folder/model_ctx.onnx"
+ * 
+ */
+std::string GetCtxNodeModelPath(const std::string& ep_context_file_path,
+                                const std::string& engine_cache_path,
+                                const std::string& original_model_path) {
+  std::string ctx_model_path;
+
+  if (!ep_context_file_path.empty() && !std::filesystem::is_directory(ep_context_file_path)) {
+    std::filesystem::path ctx_model_file_path = ep_context_file_path;
+    if (ctx_model_file_path.filename().string() == ep_context_file_path) {
+      std::filesystem::path cache_path = engine_cache_path;
+      if (cache_path.has_parent_path()) {
+        ctx_model_path = cache_path.parent_path().append(ep_context_file_path).string();
+      } else {
+        ctx_model_path = ep_context_file_path;
+      }
+    } else {
+      ctx_model_path = ep_context_file_path;
+    }
+  } else {
+    std::filesystem::path model_path = original_model_path;
+    std::filesystem::path model_name_stem = model_path.stem();  // model_name.onnx -> model_name
+    std::string ctx_model_name = model_name_stem.string() + "_ctx.onnx";
+
+    if (std::filesystem::is_directory(ep_context_file_path)) {
+      std::filesystem::path model_directory = ep_context_file_path;
+      ctx_model_path = model_directory.append(ctx_model_name).string();
+    } else {
+      std::filesystem::path cache_path = engine_cache_path;
+      if (cache_path.has_parent_path()) {
+        ctx_model_path = cache_path.parent_path().append(ctx_model_name).string();
+      } else {
+        ctx_model_path = ctx_model_name;
+      }
+    }
+  }
+  return ctx_model_path;
+}
+
 /*
  * Dump "EP context node" model
  *
  */
 void DumpCtxNodeModel(ONNX_NAMESPACE::ModelProto* model_proto,
-                      const std::string engine_cache_path) {
-  std::fstream dump(engine_cache_path + "_wrapper.onnx", std::ios::out | std::ios::trunc | std::ios::binary);
+                      const std::string& ctx_model_path) {
+  std::fstream dump(ctx_model_path, std::ios::out | std::ios::trunc | std::ios::binary);
   model_proto->SerializeToOstream(dump);
-  LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized " + engine_cache_path + "_wrapper.onnx";
+  LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Dumped " + ctx_model_path;
 }
 
 Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph_viewer) {
diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
index ab6ea733adfa1..50f235740932c 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
@@ -28,8 +28,11 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer,
                                                bool compute_capability_enable,
                                                std::string compute_capability,
                                                const logging::Logger* logger);
+std::string GetCtxNodeModelPath(const std::string& ep_context_file_path,
+                                const std::string& engine_cache_path,
+                                const std::string& original_model_path);
 void DumpCtxNodeModel(ONNX_NAMESPACE::ModelProto* model_proto,
-                      const std::string engine_cache_path);
+                      const std::string& ctx_model_path);
 void UpdateCtxNodeModelEngineContext(ONNX_NAMESPACE::ModelProto* model_proto,
                                      char* engine_data,
                                      size_t size);
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index aa02d8384afa6..88c7cce140ae3 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -1381,6 +1381,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     profile_opt_shapes = info.profile_opt_shapes;
     cuda_graph_enable_ = info.cuda_graph_enable;
     dump_ep_context_model_ = info.dump_ep_context_model;
+    ep_context_file_path_ = info.ep_context_file_path;
     ep_context_embed_mode_ = info.ep_context_embed_mode;
     ep_context_compute_capability_enable_ = info.ep_context_compute_capability_enable;
   } else {
@@ -1543,6 +1544,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
         dump_ep_context_model_ = (std::stoi(dump_ep_context_model_env) == 0 ? false : true);
       }
 
+      const std::string ep_context_file_path_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEpContextComputeCapabilityEnable);
+      if (!ep_context_file_path_env.empty()) {
+        ep_context_file_path_ = ep_context_file_path_env;
+      }
+
       const std::string ep_context_embed_mode_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEpContextEmbedMode);
       if (!ep_context_embed_mode_env.empty()) {
         ep_context_embed_mode_ = std::stoi(ep_context_embed_mode_env);
@@ -1580,7 +1586,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     dla_core_ = 0;
   }
 
-  if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_ || !cache_prefix_.empty()) {
+  if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
     if (!cache_path_.empty() && !fs::is_directory(cache_path_)) {
       if (!fs::create_directory(cache_path_)) {
         throw std::runtime_error("Failed to create directory " + cache_path_);
@@ -1692,6 +1698,10 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
                         << ", trt_profile_max_shapes: " << profile_max_shapes
                         << ", trt_profile_opt_shapes: " << profile_opt_shapes
                         << ", trt_cuda_graph_enable: " << cuda_graph_enable_
+                        << ", trt_dump_ep_context_model: " << dump_ep_context_model_
+                        << ", trt_ep_context_file_path: " << ep_context_file_path_
+                        << ", trt_ep_context_embed_mode: " << ep_context_embed_mode_
+                        << ", trt_ep_context_compute_capability_enable: " << ep_context_compute_capability_enable_
                         << ", trt_cache_prefix: " << cache_prefix_;
 }
 
@@ -2831,10 +2841,8 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
   std::unique_ptr<nvinfer1::ICudaEngine> trt_engine;
   std::unique_ptr<nvinfer1::IExecutionContext> trt_context;
 
-  // Name the engine cache based on GPU compute capacity and reduce the chance of loading an incompatible cache
-  // Note: Engine cache generated on a GPU with large memory might not be loadable on a GPU with smaller memory, even if they share the same compute capacity
-  std::string cache_suffix = "";
   std::string cache_path = "";
+  std::string cache_suffix = "";
   // Customize cache prefix if assigned
   if (!cache_prefix_.empty()) {
     // Generate cache suffix in case user would like to customize cache prefix
@@ -2843,11 +2851,19 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
   } else {
     cache_path = GetCachePath(cache_path_, trt_node_name_with_precision);
   }
+
+  // Name the engine cache based on GPU compute capacity and reduce the chance of loading an incompatible cache
+  // Note: Engine cache generated on a GPU with large memory might not be loadable on a GPU with smaller memory, even if they share the same compute capacity
   const std::string cache_path_prefix = cache_path + "_sm" + compute_capability_;
   const std::string engine_cache_path = cache_path_prefix + ".engine";
   const std::string encrypted_engine_cache_path = engine_cache_path + ".encrypted";
   const std::string profile_cache_path = cache_path_prefix + ".profile";
 
+  // Generate file name for dumping ep context model
+  if (dump_ep_context_model_ && ctx_model_path_.empty()) {
+    ctx_model_path_ = GetCtxNodeModelPath(ep_context_file_path_, engine_cache_path, model_path_);
+  }
+
   if (!has_dynamic_shape) {
     std::string timing_cache_path = "";
     bool engine_update = false;
@@ -2992,7 +3008,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
                                                                                      ep_context_compute_capability_enable_,
                                                                                      compute_capability_,
                                                                                      GetLogger())};
-          DumpCtxNodeModel(model_proto.get(), cache_path_prefix);
+          DumpCtxNodeModel(model_proto.get(), ctx_model_path_);
         }
       }
     }
@@ -3061,7 +3077,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
                                           compute_capability_,
                                           GetLogger()));
     if (ep_context_embed_mode_ == 0) {
-      DumpCtxNodeModel(model_proto_.get(), cache_path_prefix);
+      DumpCtxNodeModel(model_proto_.get(), ctx_model_path_);
     }
   }
 
@@ -3382,7 +3398,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
       // dump ep context model
       if (dump_ep_context_model_ && ep_context_embed_mode_) {
         UpdateCtxNodeModelEngineContext(model_proto_.get(), reinterpret_cast<char*>(serialized_engine->data()), serialized_engine->size());
-        DumpCtxNodeModel(model_proto_.get(), cache_path_prefix);
+        DumpCtxNodeModel(model_proto_.get(), ctx_model_path_);
       }
       context_update = true;
     }
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
index 401a8da119ac2..7216a6da6839c 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -293,6 +293,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   bool force_timing_cache_match_ = false;
   bool detailed_build_log_ = false;
   bool cuda_graph_enable_ = false;
+  std::string ctx_model_path_;
   std::string cache_prefix_;
 
   // The OrtAllocator object will be get during ep compute time
@@ -301,8 +302,9 @@ class TensorrtExecutionProvider : public IExecutionProvider {
 
   // For create/dump EP context node model
   bool dump_ep_context_model_ = false;
+  std::string ep_context_file_path_;
   int ep_context_embed_mode_ = 0;
-  bool ep_context_compute_capability_enable_ = true;
+  bool ep_context_compute_capability_enable_ = false;
   std::unique_ptr<ONNX_NAMESPACE::ModelProto> model_proto_ = ONNX_NAMESPACE::ModelProto::Create();
 
   std::unordered_set<std::string> control_flow_op_set_ = {"If", "Loop", "Scan"};
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
index 28f6e1720f615..1143af60486ea 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
@@ -47,8 +47,9 @@ constexpr const char* kProfilesMinShapes = "trt_profile_min_shapes";
 constexpr const char* kProfilesMaxShapes = "trt_profile_max_shapes";
 constexpr const char* kProfilesOptShapes = "trt_profile_opt_shapes";
 constexpr const char* kCudaGraphEnable = "trt_cuda_graph_enable";
-constexpr const char* kDumpEpContextModel = "trt_dump_ep_context_model";
 constexpr const char* kEpContextEmbedMode = "trt_ep_context_embed_mode";
+constexpr const char* kEpContextFilePath = "trt_ep_context_file_path";
+constexpr const char* kDumpEpContextModel = "trt_dump_ep_context_model";
 constexpr const char* kEpContextComputeCapabilityEnable = "trt_ep_context_compute_capability_enable";
 }  // namespace provider_option_names
 }  // namespace tensorrt
@@ -103,6 +104,7 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions
           .AddAssignmentToReference(tensorrt::provider_option_names::kProfilesOptShapes, info.profile_opt_shapes)
           .AddAssignmentToReference(tensorrt::provider_option_names::kCudaGraphEnable, info.cuda_graph_enable)
           .AddAssignmentToReference(tensorrt::provider_option_names::kDumpEpContextModel, info.dump_ep_context_model)
+          .AddAssignmentToReference(tensorrt::provider_option_names::kEpContextFilePath, info.ep_context_file_path)
           .AddAssignmentToReference(tensorrt::provider_option_names::kEpContextEmbedMode, info.ep_context_embed_mode)
           .AddAssignmentToReference(tensorrt::provider_option_names::kEpContextComputeCapabilityEnable, info.ep_context_compute_capability_enable)
           .Parse(options));  // add new provider option here.
@@ -148,6 +150,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
       {tensorrt::provider_option_names::kProfilesOptShapes, MakeStringWithClassicLocale(info.profile_opt_shapes)},
       {tensorrt::provider_option_names::kCudaGraphEnable, MakeStringWithClassicLocale(info.cuda_graph_enable)},
       {tensorrt::provider_option_names::kDumpEpContextModel, MakeStringWithClassicLocale(info.dump_ep_context_model)},
+      {tensorrt::provider_option_names::kEpContextFilePath, MakeStringWithClassicLocale(info.ep_context_file_path)},
       {tensorrt::provider_option_names::kEpContextEmbedMode, MakeStringWithClassicLocale(info.ep_context_embed_mode)},
       {tensorrt::provider_option_names::kEpContextComputeCapabilityEnable, MakeStringWithClassicLocale(info.ep_context_compute_capability_enable)},
   };
@@ -166,6 +169,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor
   const std::string kProfilesMinShapes_ = empty_if_null(info.trt_profile_min_shapes);
   const std::string kProfilesMaxShapes_ = empty_if_null(info.trt_profile_max_shapes);
   const std::string kProfilesOptShapes_ = empty_if_null(info.trt_profile_opt_shapes);
+  const std::string kEpContextFilePath_ = empty_if_null(info.trt_ep_context_file_path);
 
   const ProviderOptions options{
       {tensorrt::provider_option_names::kDeviceId, MakeStringWithClassicLocale(info.device_id)},
@@ -202,6 +206,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor
       {tensorrt::provider_option_names::kProfilesMaxShapes, kProfilesMaxShapes_},
       {tensorrt::provider_option_names::kProfilesOptShapes, kProfilesOptShapes_},
       {tensorrt::provider_option_names::kCudaGraphEnable, MakeStringWithClassicLocale(info.trt_cuda_graph_enable)},
+      {tensorrt::provider_option_names::kEpContextFilePath, kEpContextFilePath_},
       {tensorrt::provider_option_names::kDumpEpContextModel, MakeStringWithClassicLocale(info.trt_dump_ep_context_model)},
       {tensorrt::provider_option_names::kEpContextEmbedMode, MakeStringWithClassicLocale(info.trt_ep_context_embed_mode)},
       {tensorrt::provider_option_names::kEpContextComputeCapabilityEnable, MakeStringWithClassicLocale(info.trt_ep_context_compute_capability_enable)},
@@ -299,6 +304,7 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
   trt_provider_options_v2.trt_cuda_graph_enable = internal_options.cuda_graph_enable;
   trt_provider_options_v2.trt_dump_ep_context_model = internal_options.dump_ep_context_model;
   trt_provider_options_v2.trt_ep_context_embed_mode = internal_options.ep_context_embed_mode;
+  trt_provider_options_v2.trt_ep_context_file_path = copy_string_if_needed(internal_options.ep_context_file_path);
   trt_provider_options_v2.trt_ep_context_compute_capability_enable = internal_options.ep_context_compute_capability_enable;
 }
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
index a133ef45affe8..2518bdd5337a0 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
@@ -52,8 +52,9 @@ struct TensorrtExecutionProviderInfo {
   std::string profile_opt_shapes{""};
   bool cuda_graph_enable{false};
   bool dump_ep_context_model{false};
+  std::string ep_context_file_path{""};
   int ep_context_embed_mode{0};
-  bool ep_context_compute_capability_enable{1};
+  bool ep_context_compute_capability_enable{0};
   std::string engine_cache_prefix{""};
 
   static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options);
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
index 62f124afbd1e5..722f24c3fd6ae 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
@@ -61,13 +61,6 @@ std::unique_ptr<IExecutionProvider> TensorrtProviderFactory::CreateProvider() {
   return std::make_unique<TensorrtExecutionProvider>(info_);
 }
 
-std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(int device_id) {
-  TensorrtExecutionProviderInfo info;
-  info.device_id = device_id;
-  info.has_trt_options = false;
-  return std::make_shared<onnxruntime::TensorrtProviderFactory>(info);
-}
-
 struct Tensorrt_Provider : Provider {
   void* GetInfo() override { return &g_info; }
   std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(int device_id) override {
@@ -117,6 +110,7 @@ struct Tensorrt_Provider : Provider {
     info.profile_opt_shapes = options.trt_profile_opt_shapes == nullptr ? "" : options.trt_profile_opt_shapes;
     info.cuda_graph_enable = options.trt_cuda_graph_enable != 0;
     info.dump_ep_context_model = options.trt_dump_ep_context_model != 0;
+    info.ep_context_file_path = options.trt_ep_context_file_path == nullptr ? "" : options.trt_ep_context_file_path;
     info.ep_context_embed_mode = options.trt_ep_context_embed_mode;
     info.ep_context_compute_capability_enable = options.trt_ep_context_compute_capability_enable != 0;
     info.engine_cache_prefix = options.trt_engine_cache_prefix == nullptr ? "" : options.trt_engine_cache_prefix;
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h
index d905003fb7cc1..96917c8fb8e88 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h
@@ -15,6 +15,8 @@ namespace onnxruntime {
 struct TensorrtProviderFactoryCreator {
   static std::shared_ptr<IExecutionProviderFactory> Create(int device_id);
   static std::shared_ptr<IExecutionProviderFactory> Create(const OrtTensorRTProviderOptions* provider_options);
+  static std::shared_ptr<IExecutionProviderFactory> Create(void* session_options, const OrtTensorRTProviderOptions* provider_options);
   static std::shared_ptr<IExecutionProviderFactory> Create(const OrtTensorRTProviderOptionsV2* provider_options);
+  static std::shared_ptr<IExecutionProviderFactory> Create(void* session_options, const OrtTensorRTProviderOptionsV2* provider_options);
 };
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 2f5ca1de62d66..784dd98fd952f 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1419,94 +1419,41 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti
   trt_options_converted.trt_profile_max_shapes = "";
   trt_options_converted.trt_profile_opt_shapes = "";
   trt_options_converted.trt_cuda_graph_enable = 0;
+  trt_options_converted.trt_dump_ep_context_model = 0;
+  trt_options_converted.trt_ep_context_file_path = "";
+  trt_options_converted.trt_ep_context_embed_mode = 0;
+  trt_options_converted.trt_ep_context_compute_capability_enable = 0;
   trt_options_converted.trt_engine_cache_prefix = "";
 
   return trt_options_converted;
 }
 
-// Get configs from session options that are needed for TensorRT EP
+// Apply configs from session options to TensorRT provider options V2 that are needed for TensorRT EP.
+// For example, EP context configs.
 void UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(OrtSessionOptions* session_options, OrtTensorRTProviderOptionsV2* tensorrt_options) {
-  tensorrt_options->trt_dump_ep_context_model = 1;
-  std::string embed_mode = (session_options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "1");
-  if ("1" == embed_mode) {
-    tensorrt_options->trt_ep_context_embed_mode = 1;
-  } else if ("0" == embed_mode) {
-    tensorrt_options->trt_ep_context_embed_mode = 0;
-  } else {
-    LOGS_DEFAULT(VERBOSE) << "Invalid ep.context_embed_mode: " << embed_mode << " only 0 or 1 allowed. Set to 1.";
-  }
-  LOGS_DEFAULT(VERBOSE) << "User specified context cache embed mode: " << tensorrt_options->trt_ep_context_embed_mode;
-}
-
-void CopyOrtTensorRTProviderOptionsV2(OrtTensorRTProviderOptionsV2* dst, const OrtTensorRTProviderOptionsV2* src, bool string_copy) {
-  if (src == nullptr) {
-    return;
-  }
-  auto copy_string_if_needed = [&](std::string s_in) {
-    if (string_copy) {
-      char* dest = nullptr;
-      auto str_size = s_in.size();
-      if (str_size == 0) {
-        return (const char*)nullptr;
-      } else {
-        dest = new char[str_size + 1];
-#ifdef _MSC_VER
-        strncpy_s(dest, str_size + 1, s_in.c_str(), str_size);
-#else
-        strncpy(dest, s_in.c_str(), str_size);
-#endif
-        dest[str_size] = '\0';
-        return (const char*)dest;
-      }
+  if (session_options) {
+    auto context_cache_enabled = (session_options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") != "0";
+    tensorrt_options->trt_dump_ep_context_model = context_cache_enabled;
+    LOGS_DEFAULT(VERBOSE) << "Context cache enable: " << context_cache_enabled;
+
+    auto context_cache_path = (session_options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, "");
+    tensorrt_options->trt_ep_context_file_path = context_cache_path.c_str();
+    LOGS_DEFAULT(VERBOSE) << "User specified context cache path: " << tensorrt_options->trt_ep_context_file_path;
+
+    auto embed_mode = (session_options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "1");
+    if ("1" == embed_mode) {
+      tensorrt_options->trt_ep_context_embed_mode = 1;
+    } else if ("0" == embed_mode) {
+      tensorrt_options->trt_ep_context_embed_mode = 0;
     } else {
-      return s_in.c_str();
+      LOGS_DEFAULT(VERBOSE) << "Invalid ep.context_embed_mode: " << embed_mode << " only 0 or 1 allowed. Set to 1.";
     }
-  };
-
-  dst->device_id = src->device_id;
-  dst->has_user_compute_stream = src->has_user_compute_stream;
-  dst->trt_max_partition_iterations = src->trt_max_partition_iterations;
-  dst->trt_min_subgraph_size = src->trt_min_subgraph_size;
-  dst->trt_max_workspace_size = src->trt_max_workspace_size;
-  dst->trt_fp16_enable = src->trt_fp16_enable;
-  dst->trt_int8_enable = src->trt_int8_enable;
-
-  dst->trt_int8_calibration_table_name = copy_string_if_needed(src->trt_int8_calibration_table_name);
-
-  dst->trt_int8_use_native_calibration_table = src->trt_int8_use_native_calibration_table;
-  dst->trt_dla_enable = src->trt_dla_enable;
-  dst->trt_dla_core = src->trt_dla_core;
-  dst->trt_dump_subgraphs = src->trt_dump_subgraphs;
-  dst->trt_engine_cache_enable = src->trt_engine_cache_enable;
-
-  dst->trt_engine_cache_path = copy_string_if_needed(src->trt_engine_cache_path);
-  dst->trt_timing_cache_path = copy_string_if_needed(src->trt_timing_cache_path);
-
-  dst->trt_engine_decryption_enable = src->trt_engine_decryption_enable;
-
-  dst->trt_engine_decryption_lib_path = copy_string_if_needed(src->trt_engine_decryption_lib_path);
-
-  dst->trt_force_sequential_engine_build = src->trt_force_sequential_engine_build;
-  dst->trt_context_memory_sharing_enable = src->trt_context_memory_sharing_enable;
-  dst->trt_layer_norm_fp32_fallback = src->trt_layer_norm_fp32_fallback;
-  dst->trt_timing_cache_enable = src->trt_timing_cache_enable;
-  dst->trt_force_timing_cache = src->trt_force_timing_cache;
-  dst->trt_detailed_build_log = src->trt_detailed_build_log;
-  dst->trt_build_heuristics_enable = src->trt_build_heuristics_enable;
-  dst->trt_sparsity_enable = src->trt_sparsity_enable;
-  dst->trt_builder_optimization_level = src->trt_builder_optimization_level;
-  dst->trt_auxiliary_streams = src->trt_auxiliary_streams;
-
-  dst->trt_tactic_sources = copy_string_if_needed(src->trt_tactic_sources);
-  dst->trt_extra_plugin_lib_paths = copy_string_if_needed(src->trt_extra_plugin_lib_paths);
-  dst->trt_profile_min_shapes = copy_string_if_needed(src->trt_profile_min_shapes);
-  dst->trt_profile_max_shapes = copy_string_if_needed(src->trt_profile_max_shapes);
-  dst->trt_profile_opt_shapes = copy_string_if_needed(src->trt_profile_opt_shapes);
+    LOGS_DEFAULT(VERBOSE) << "User specified context cache embed mode: " << tensorrt_options->trt_ep_context_embed_mode;
 
-  dst->trt_cuda_graph_enable = src->trt_cuda_graph_enable;
-  dst->trt_dump_ep_context_model = src->trt_dump_ep_context_model;
-  dst->trt_ep_context_embed_mode = src->trt_ep_context_embed_mode;
-  dst->trt_ep_context_compute_capability_enable = src->trt_ep_context_compute_capability_enable;
+    auto context_hardware_arch_enable = (session_options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextHardwareArchitectureEnable, "0") != "0";
+    tensorrt_options->trt_ep_context_compute_capability_enable = context_hardware_arch_enable;
+    LOGS_DEFAULT(VERBOSE) << "User specified context hardware architecture enable: " << tensorrt_options->trt_ep_context_compute_capability_enable;
+  }
 }
 
 std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(int device_id) {
@@ -1518,10 +1465,26 @@ std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Creat
   return s_library_tensorrt.Get().CreateExecutionProviderFactory(&trt_options_converted);
 }
 
+std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(void* session_options, const OrtTensorRTProviderOptions* provider_options) {
+  OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(provider_options);
+  onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast<OrtSessionOptions*>(session_options), &trt_options_converted);
+  return s_library_tensorrt.Get().CreateExecutionProviderFactory(&trt_options_converted);
+}
+
 std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(const OrtTensorRTProviderOptionsV2* provider_options) {
   return s_library_tensorrt.Get().CreateExecutionProviderFactory(provider_options);
 }
 
+std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(void* session_options, const OrtTensorRTProviderOptionsV2* provider_options) {
+  // We need to create a new provider options V2 object and copy from provider_options, due to the "const" object pointed by provider_options can't be modified.
+  // 
+  // Note: No need to worry about tensorrt_options being a local variable, CreateExecutionProviderFactory() in TRT EP will
+  // create a factory object that copies any provider options from tensorrt_options including "const char*" provider options.
+  OrtTensorRTProviderOptionsV2 tensorrt_options = *provider_options; // copy and assign from provider_options
+  onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast<OrtSessionOptions*>(session_options), &tensorrt_options);
+  return s_library_tensorrt.Get().CreateExecutionProviderFactory(&tensorrt_options);
+}
+
 std::shared_ptr<IExecutionProviderFactory> MIGraphXProviderFactoryCreator::Create(const OrtMIGraphXProviderOptions* provider_options) {
   return s_library_migraphx.Get().CreateExecutionProviderFactory(provider_options);
 }
@@ -1797,7 +1760,18 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_MIGraphX, _In_ OrtS
 
 ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptions* tensorrt_options) {
   API_IMPL_BEGIN
-  auto factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options);
+  
+  std::shared_ptr<onnxruntime::IExecutionProviderFactory> factory;
+
+  auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") != "0";
+
+  // If EP context configs are provided in session options, we need to propagate them to provider options
+  if (ep_context_cache_enabled_from_sess_options) {
+    factory = onnxruntime::TensorrtProviderFactoryCreator::Create(options, tensorrt_options);
+  } else {
+    factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options);
+  }
+
   if (!factory) {
     return OrtApis::CreateStatus(ORT_FAIL, "SessionOptionsAppendExecutionProvider_Tensorrt: Failed to load shared library");
   }
@@ -1938,23 +1912,13 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2,
   std::shared_ptr<onnxruntime::IExecutionProviderFactory> factory;
 
   auto ep_context_cache_enabled_from_provider_options = tensorrt_options->trt_dump_ep_context_model != 0;
-  auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") == "1";
+  auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") != "0";
 
-  // If EP context configs are provided in session options, we need to propagate them to provider options.
-  // However, if provider options already have the EP context configs provided, the configs in session options
-  // will be ignored since provider options has higher priority than session options.
+  // If EP context configs are provided in session options, we need to propagate them to provider options. However,
+  // if provider options already have the EP context configs provided, the configs in session options will be ignored
+  // since provider options has higher priority than session options.
   if (!ep_context_cache_enabled_from_provider_options && ep_context_cache_enabled_from_sess_options) {
-    // We need to create another provider V2 object since the tensorrt_options points to the "const" object that can't be updated.
-    OrtTensorRTProviderOptionsV2* new_tensorrt_options = nullptr;
-    if (OrtApis::CreateTensorRTProviderOptions(&new_tensorrt_options) != nullptr) {
-      ORT_THROW("Can't create an OrtProviderOptionsV2 object.");
-    }
-    auto deleter = [](OrtTensorRTProviderOptionsV2* ptr) { OrtApis::ReleaseTensorRTProviderOptions(ptr); };
-    std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(deleter)> rel_trt_options(new_tensorrt_options, deleter);
-
-    onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(options, new_tensorrt_options);
-    onnxruntime::CopyOrtTensorRTProviderOptionsV2(new_tensorrt_options, tensorrt_options, true);
-    factory = onnxruntime::TensorrtProviderFactoryCreator::Create(new_tensorrt_options);
+    factory = onnxruntime::TensorrtProviderFactoryCreator::Create(options, tensorrt_options);
   } else {
     factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options);
   }
@@ -2104,6 +2068,7 @@ ORT_API(void, OrtApis::ReleaseTensorRTProviderOptions, _Frees_ptr_opt_ OrtTensor
     delete[] ptr->trt_profile_min_shapes;
     delete[] ptr->trt_profile_max_shapes;
     delete[] ptr->trt_profile_opt_shapes;
+    delete[] ptr->trt_ep_context_file_path;
   }
 
   std::unique_ptr<OrtTensorRTProviderOptionsV2> p(ptr);

From 55eca2e529daac9ea45facee795162da2bca3ae4 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 15 Jan 2024 21:30:54 +0000
Subject: [PATCH 03/25] update

---
 onnxruntime/core/graph/contrib_ops/contrib_defs.cc | 2 +-
 onnxruntime/python/onnxruntime_pybind_state.cc     | 9 ++++++++-
 onnxruntime/test/perftest/ort_test_session.cc      | 4 ++--
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
index 68ded671d7ac8..982e8fd834b76 100644
--- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
@@ -3232,7 +3232,7 @@ void RegisterContribSchemas() {
           OPTIONAL_VALUE)
       .Attr(
           "hardware_architecture",
-          "(Optional) Hardware architecture for running this EP context node.",
+          "(Optional) Hardware architecture.",
           AttributeProto::STRING,
           OPTIONAL_VALUE)
       .Attr(
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index d2cd6140b838e..9ce3a9a5fa07d 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -475,7 +475,7 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
       // So we need these std::string variables defined here as they will be kept alive for the lifetime of TRT EP and we can still access them from OrtTensorRTProviderOptionsV2 instance.
       // (The reason is string copy is involved, for example params.trt_engine_cache_path = cache_path.c_str() and those std::string variable is referenced by OrtTensorRTProviderOptionsV2 instance
       // and TRT EP instance, so it won't be released.)
-      std::string calibration_table, cache_path, cache_prefix, timing_cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile;
+      std::string calibration_table, cache_path, cache_prefix, timing_cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile, ep_context_file_path;
       auto it = provider_options_map.find(type);
       if (it != provider_options_map.end()) {
         OrtTensorRTProviderOptionsV2 params;
@@ -728,6 +728,13 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
             } else {
               ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_ep_context_model' should be 'True' or 'False'. Default value is 'False'.\n");
             }
+          } else if (option.first == "trt_ep_context_file_path") {
+              if (!option.second.empty()) {
+                ep_context_file_path = option.second;
+                params.trt_ep_context_file_path = ep_context_file_path.c_str();
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_ep_context_file_path' should be a string.\n");
+              }
           } else if (option.first == "trt_ep_context_embed_mode") {
             if (!option.second.empty()) {
               params.trt_ep_context_embed_mode = std::stoi(option.second);
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index 24bd550923b95..babd0786c99bb 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -46,6 +46,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
                                                const TestModelInfo& m)
     : rand_engine_(rd()), input_names_(m.GetInputCount()), input_names_str_(m.GetInputCount()), input_length_(m.GetInputCount()) {
   Ort::SessionOptions session_options;
+  session_options.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1");
+  session_options.AddConfigEntry(kOrtSessionOptionEpContextFilePath, "E:\\");
   provider_name_ = performance_test_config.machine_config.provider_type_name;
   if (provider_name_ == onnxruntime::kDnnlExecutionProvider) {
 #ifdef USE_DNNL
@@ -634,8 +636,6 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
     session_options.AddConfigEntry(kOrtSessionOptionsConfigIntraOpThreadAffinities, performance_test_config.run_config.intra_op_thread_affinities.c_str());
   }
 
-  session_options.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1");
-
   if (performance_test_config.run_config.disable_spinning) {
     fprintf(stdout, "Disabling intra-op thread spinning entirely\n");
     session_options.AddConfigEntry(kOrtSessionOptionsConfigAllowIntraOpSpinning, "0");

From 7082994150822233fc517471fcf09f854eb3268e Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 15 Jan 2024 21:31:51 +0000
Subject: [PATCH 04/25] update

---
 onnxruntime/test/perftest/ort_test_session.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index babd0786c99bb..f8a012af5bb13 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -46,8 +46,6 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
                                                const TestModelInfo& m)
     : rand_engine_(rd()), input_names_(m.GetInputCount()), input_names_str_(m.GetInputCount()), input_length_(m.GetInputCount()) {
   Ort::SessionOptions session_options;
-  session_options.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1");
-  session_options.AddConfigEntry(kOrtSessionOptionEpContextFilePath, "E:\\");
   provider_name_ = performance_test_config.machine_config.provider_type_name;
   if (provider_name_ == onnxruntime::kDnnlExecutionProvider) {
 #ifdef USE_DNNL

From 7b7a68298141ce83a3b1b28622974bb959331465 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 15 Jan 2024 22:44:00 +0000
Subject: [PATCH 05/25] add warning message

---
 .../core/providers/tensorrt/tensorrt_provider_options.h    | 2 +-
 .../core/session/onnxruntime_session_options_config_keys.h | 3 ++-
 .../core/providers/tensorrt/onnx_ctx_model_helper.cc       | 5 ++++-
 .../core/providers/tensorrt/onnx_ctx_model_helper.h        | 7 ++++++-
 .../core/providers/tensorrt/tensorrt_execution_provider.cc | 2 +-
 5 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
index b1a751c3468e4..2443fde022415 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
@@ -51,6 +51,6 @@ struct OrtTensorRTProviderOptionsV2 {
   int trt_dump_ep_context_model{0};                      // Dump EP context node model
   const char* trt_ep_context_file_path{nullptr};         // Specify file name to dump EP context node model.
   int trt_ep_context_embed_mode{0};                      // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data
-  int trt_ep_context_compute_capability_enable{0};       // Add GPU compute capability as an EP context node's attribute
+  int trt_ep_context_compute_capability_enable{1};       // Add GPU compute capability as an EP context node's attribute and check it against the compute capability when running
   const char* trt_engine_cache_prefix{nullptr};          // specify engine cache prefix
 };
diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
index 104e024c43405..9dafffc79c523 100644
--- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
+++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
@@ -251,7 +251,8 @@ static const char* const kOrtSessionOptionEpContextFilePath = "ep.context_file_p
 // "1": dump the EP context into the Onnx model. (default).
 static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode";
 
-// Enable to check whether the hardware architecture matches the EP context node's "hardware_architecture" attribute.
+// Enable to dump the EP context node with "hardware_architecture" attribute and check this attribute against the
+// hardware architecture when inferencing.
 // "0": disable. (default)
 // "1": enable.
 static const char* const kOrtSessionOptionEpContextHardwareArchitectureEnable = "ep.context_hardware_architecture_enable";
\ No newline at end of file
diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
index 661bfd6603879..b7fbf60e304ff 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
@@ -107,6 +107,7 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer,
       engine_data_str.assign(engine_data, size);
     }
     attr_1->set_s(engine_data_str);
+    LOGS_DEFAULT(WARNING) << EPCONTEXT_WARNING;
   } else {
     attr_1->set_s(engine_cache_path);
   }
@@ -269,7 +270,7 @@ bool TensorRTCacheModelHandler::ValidateEPCtxNode(const GraphViewer& graph_viewe
   auto& attrs = node->GetAttributes();
 
   // Check hardware_architecture(compute_capability) if it's present as an attribute
-  if (attrs.count(COMPUTE_CAPABILITY) > 0) {
+  if (compute_capability_enable_ && attrs.count(COMPUTE_CAPABILITY) > 0) {
     std::string model_compute_capability = attrs.at(COMPUTE_CAPABILITY).s();
     if (model_compute_capability != compute_capability_) {
       LOGS_DEFAULT(ERROR) << "The compute capability of the engine cache doesn't match with the GPU's compute capability";
@@ -297,6 +298,8 @@ bool TensorRTCacheModelHandler::ValidateEPCtxNode(const GraphViewer& graph_viewe
           return false;
         }
       }
+    } else if (embed_mode == 1) {
+      LOGS_DEFAULT(WARNING) << EPCONTEXT_WARNING;
     }
   }
   return true;
diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
index 50f235740932c..897bf123f8596 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
@@ -16,6 +16,9 @@ static const std::string EMBED_MODE = "embed_mode";
 static const std::string EP_CACHE_CONTEXT = "ep_cache_context";
 static const std::string COMPUTE_CAPABILITY = "hardware_architecture";
 static const std::string EPCONTEXT_OP_DOMAIN = "com.microsoft";
+static const std::string EPCONTEXT_WARNING = "It's suggested to set the ORT graph optimization level to 0 and  \
+                                              make \"embed_mode\" to 0 (\"ep_cache_context\" is the cache path)\
+                                              for the best model loading time";
 
 bool GraphHasCtxNode(const GraphViewer& graph_viewer);
 const onnxruntime::Path& GetModelPath(const GraphViewer& graph_viewer);
@@ -41,7 +44,8 @@ class TensorRTCacheModelHandler {
  public:
   TensorRTCacheModelHandler(std::unique_ptr<nvinfer1::ICudaEngine>* trt_engine,
                             nvinfer1::IRuntime* trt_runtime,
-                            std::string compute_capability) : trt_engine_(trt_engine), trt_runtime_(trt_runtime), compute_capability_(compute_capability) {
+                            std::string compute_capability,
+                            bool compute_capability_enable = true) : trt_engine_(trt_engine), trt_runtime_(trt_runtime), compute_capability_(compute_capability), compute_capability_enable_(compute_capability_enable) {
   }
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TensorRTCacheModelHandler);
 
@@ -54,5 +58,6 @@ class TensorRTCacheModelHandler {
   nvinfer1::IRuntime* trt_runtime_;
   std::filesystem::path engine_cache_path_;
   std::string compute_capability_;
+  bool compute_capability_enable_;
 };  // TRTCacheModelHandler
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index 88c7cce140ae3..a3d901d8dd14f 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -3591,7 +3591,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con
   std::unordered_map<std::string, size_t> output_types;    // TRT engine output name -> ORT output tensor type
 
   // Get engine binary data and deserialize it
-  auto trt_cache_model_handler = TensorRTCacheModelHandler(&trt_engine, runtime_.get(), compute_capability_);
+  auto trt_cache_model_handler = TensorRTCacheModelHandler(&trt_engine, runtime_.get(), compute_capability_, ep_context_compute_capability_enable_);
   auto status = trt_cache_model_handler.GetEpContextFromGraph(graph_body_viewer);
   if (status != Status::OK()) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, status.ErrorMessage());

From 69ed710e0a5f9790b8bc2feb5a7a7946b317f694 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 15 Jan 2024 22:56:46 +0000
Subject: [PATCH 06/25] support trt plugins for the script

---
 .../gen_trt_engine_wrapper_onnx_model.py       | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py b/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py
index 717a0816247e7..92e92699299d5 100644
--- a/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py
+++ b/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py
@@ -15,6 +15,7 @@ def __init__(self, args):
         engine_cache_path = args.trt_engine_cache_path
         self.model_name = args.model_name
         self.dynamic_dim_count = 0
+        self.plugins = args.plugins
 
         # Get serialized engine from engine cache
         with open(engine_cache_path, "rb") as file:
@@ -25,8 +26,15 @@ def __init__(self, args):
         else:
             ep_cache_context_content = engine_cache_path
 
-        # Deserialize an TRT engine
         logger = trt.Logger(trt.Logger.WARNING)
+
+        # Enable TRT plugins
+        trt.init_libnvinfer_plugins(logger, "")
+        if len(self.plugins):
+            import ctypes
+            ctypes.CDLL(self.plugins)
+
+        # Deserialize an TRT engine
         runtime = trt.Runtime(logger)
         engine = runtime.deserialize_cuda_engine(engine_buffer)
         num_bindings = engine.num_bindings
@@ -165,6 +173,14 @@ def main():
         default="trt_engine_wrapper.onnx",
         type=str,
     )
+    parser.add_argument(
+        "--plugins",
+        help="List of plugin paths to load",
+        required=False,
+        default=[],
+        nargs="+",
+        type=str,
+    )
     args = parser.parse_args()
     ctor = TensorRTEngineWrapperCreator(args)
     ctor.create_model()

From 9111d135bc3431cd5eed210c0e8aeb5562f33418 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Tue, 16 Jan 2024 21:10:55 +0000
Subject: [PATCH 07/25] fix bug for minimal build

---
 onnxruntime/core/session/provider_bridge_ort.cc | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 784dd98fd952f..943d0e0a0d277 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1428,6 +1428,7 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti
   return trt_options_converted;
 }
 
+#ifdef USE_TENSORRT
 // Apply configs from session options to TensorRT provider options V2 that are needed for TensorRT EP.
 // For example, EP context configs.
 void UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(OrtSessionOptions* session_options, OrtTensorRTProviderOptionsV2* tensorrt_options) {
@@ -1455,6 +1456,7 @@ void UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(OrtSessionOptio
     LOGS_DEFAULT(VERBOSE) << "User specified context hardware architecture enable: " << tensorrt_options->trt_ep_context_compute_capability_enable;
   }
 }
+#endif
 
 std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(int device_id) {
   return s_library_tensorrt.Get().CreateExecutionProviderFactory(device_id);
@@ -1467,7 +1469,11 @@ std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Creat
 
 std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(void* session_options, const OrtTensorRTProviderOptions* provider_options) {
   OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(provider_options);
+#ifdef USE_TENSORRT
   onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast<OrtSessionOptions*>(session_options), &trt_options_converted);
+#else
+  ORT_UNUSED_PARAMETER(session_options);
+#endif
   return s_library_tensorrt.Get().CreateExecutionProviderFactory(&trt_options_converted);
 }
 
@@ -1481,7 +1487,11 @@ std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Creat
   // Note: No need to worry about tensorrt_options being a local variable, CreateExecutionProviderFactory() in TRT EP will
   // create a factory object that copies any provider options from tensorrt_options including "const char*" provider options.
   OrtTensorRTProviderOptionsV2 tensorrt_options = *provider_options; // copy and assign from provider_options
+#ifdef USE_TENSORRT
   onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast<OrtSessionOptions*>(session_options), &tensorrt_options);
+#else
+  ORT_UNUSED_PARAMETER(session_options);
+#endif
   return s_library_tensorrt.Get().CreateExecutionProviderFactory(&tensorrt_options);
 }
 

From f7fe5e7518d5a7487768bc732b7ad92af96facac Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Tue, 16 Jan 2024 21:55:57 +0000
Subject: [PATCH 08/25] fix bug for minimal build

---
 .../core/session/provider_bridge_ort.cc        | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 943d0e0a0d277..f715a07acdcb7 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1428,7 +1428,7 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti
   return trt_options_converted;
 }
 
-#ifdef USE_TENSORRT
+#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
 // Apply configs from session options to TensorRT provider options V2 that are needed for TensorRT EP.
 // For example, EP context configs.
 void UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(OrtSessionOptions* session_options, OrtTensorRTProviderOptionsV2* tensorrt_options) {
@@ -1469,11 +1469,13 @@ std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Creat
 
 std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(void* session_options, const OrtTensorRTProviderOptions* provider_options) {
   OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(provider_options);
-#ifdef USE_TENSORRT
+
+#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
   onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast<OrtSessionOptions*>(session_options), &trt_options_converted);
 #else
   ORT_UNUSED_PARAMETER(session_options);
 #endif
+
   return s_library_tensorrt.Get().CreateExecutionProviderFactory(&trt_options_converted);
 }
 
@@ -1487,7 +1489,8 @@ std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Creat
   // Note: No need to worry about tensorrt_options being a local variable, CreateExecutionProviderFactory() in TRT EP will
   // create a factory object that copies any provider options from tensorrt_options including "const char*" provider options.
   OrtTensorRTProviderOptionsV2 tensorrt_options = *provider_options; // copy and assign from provider_options
-#ifdef USE_TENSORRT
+
+#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
   onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast<OrtSessionOptions*>(session_options), &tensorrt_options);
 #else
   ORT_UNUSED_PARAMETER(session_options);
@@ -1773,7 +1776,11 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In
   
   std::shared_ptr<onnxruntime::IExecutionProviderFactory> factory;
 
+#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
   auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") != "0";
+#else
+  auto ep_context_cache_enabled_from_sess_options = false;
+#endif
 
   // If EP context configs are provided in session options, we need to propagate them to provider options
   if (ep_context_cache_enabled_from_sess_options) {
@@ -1921,8 +1928,13 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2,
 
   std::shared_ptr<onnxruntime::IExecutionProviderFactory> factory;
 
+#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
   auto ep_context_cache_enabled_from_provider_options = tensorrt_options->trt_dump_ep_context_model != 0;
   auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") != "0";
+#else
+  auto ep_context_cache_enabled_from_provider_options = false;
+  auto ep_context_cache_enabled_from_sess_options = false;
+#endif
 
   // If EP context configs are provided in session options, we need to propagate them to provider options. However,
   // if provider options already have the EP context configs provided, the configs in session options will be ignored

From ffffb51cfbf5575e783ded6bcc69a7abc329848e Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Wed, 17 Jan 2024 18:00:44 +0000
Subject: [PATCH 09/25] remove trt_ep_context_compute_capability_check and only
 show the warning if not matched

---
 .../providers/tensorrt/tensorrt_provider_options.h    |  1 -
 .../session/onnxruntime_session_options_config_keys.h |  8 +-------
 .../core/providers/tensorrt/onnx_ctx_model_helper.cc  | 11 +++++------
 .../core/providers/tensorrt/onnx_ctx_model_helper.h   |  4 +---
 .../providers/tensorrt/tensorrt_execution_provider.cc |  6 ++----
 .../providers/tensorrt/tensorrt_execution_provider.h  |  1 -
 .../tensorrt/tensorrt_execution_provider_info.cc      |  5 -----
 .../tensorrt/tensorrt_execution_provider_info.h       |  1 -
 .../providers/tensorrt/tensorrt_provider_factory.cc   |  1 -
 onnxruntime/core/session/provider_bridge_ort.cc       |  7 +------
 onnxruntime/python/onnxruntime_pybind_state.cc        |  8 --------
 .../test/providers/tensorrt/tensorrt_basic_test.cc    | 10 ++++++++++
 12 files changed, 20 insertions(+), 43 deletions(-)

diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
index 2443fde022415..1d9af3f18d184 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
@@ -51,6 +51,5 @@ struct OrtTensorRTProviderOptionsV2 {
   int trt_dump_ep_context_model{0};                      // Dump EP context node model
   const char* trt_ep_context_file_path{nullptr};         // Specify file name to dump EP context node model.
   int trt_ep_context_embed_mode{0};                      // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data
-  int trt_ep_context_compute_capability_enable{1};       // Add GPU compute capability as an EP context node's attribute and check it against the compute capability when running
   const char* trt_engine_cache_prefix{nullptr};          // specify engine cache prefix
 };
diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
index 9dafffc79c523..df79cb6e5b21b 100644
--- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
+++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
@@ -249,10 +249,4 @@ static const char* const kOrtSessionOptionEpContextFilePath = "ep.context_file_p
 // Flag to specify whether to dump the EP context into the Onnx model.
 // "0": dump the EP context into separate file, keep the file name in the Onnx model.
 // "1": dump the EP context into the Onnx model. (default).
-static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode";
-
-// Enable to dump the EP context node with "hardware_architecture" attribute and check this attribute against the
-// hardware architecture when inferencing.
-// "0": disable. (default)
-// "1": enable.
-static const char* const kOrtSessionOptionEpContextHardwareArchitectureEnable = "ep.context_hardware_architecture_enable";
\ No newline at end of file
+static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode";
\ No newline at end of file
diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
index b7fbf60e304ff..0bf0cd2635f13 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
@@ -269,14 +269,13 @@ bool TensorRTCacheModelHandler::ValidateEPCtxNode(const GraphViewer& graph_viewe
   auto node = graph_viewer.GetNode(0);
   auto& attrs = node->GetAttributes();
 
-  // Check hardware_architecture(compute_capability) if it's present as an attribute
-  if (compute_capability_enable_ && attrs.count(COMPUTE_CAPABILITY) > 0) {
+  // Show the warning if compute capability is not matched
+  if (attrs.count(COMPUTE_CAPABILITY) > 0) {
     std::string model_compute_capability = attrs.at(COMPUTE_CAPABILITY).s();
     if (model_compute_capability != compute_capability_) {
-      LOGS_DEFAULT(ERROR) << "The compute capability of the engine cache doesn't match with the GPU's compute capability";
-      LOGS_DEFAULT(ERROR) << "The compute capability of the engine cache: " << model_compute_capability;
-      LOGS_DEFAULT(ERROR) << "The compute capability of the GPU: " << compute_capability_;
-      return false;
+      LOGS_DEFAULT(WARNING) << "[TensorRT EP] Engine was compiled for a different compatibility level and might not work or perform suboptimal";
+      LOGS_DEFAULT(WARNING) << "[TensorRT EP] The compute capability of the engine: " << model_compute_capability;
+      LOGS_DEFAULT(WARNING) << "[TensorRT EP] The compute capability of the GPU: " << compute_capability_;
     }
   }
 
diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
index 897bf123f8596..90fbdb7537bec 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
@@ -44,8 +44,7 @@ class TensorRTCacheModelHandler {
  public:
   TensorRTCacheModelHandler(std::unique_ptr<nvinfer1::ICudaEngine>* trt_engine,
                             nvinfer1::IRuntime* trt_runtime,
-                            std::string compute_capability,
-                            bool compute_capability_enable = true) : trt_engine_(trt_engine), trt_runtime_(trt_runtime), compute_capability_(compute_capability), compute_capability_enable_(compute_capability_enable) {
+                            std::string compute_capability) : trt_engine_(trt_engine), trt_runtime_(trt_runtime), compute_capability_(compute_capability) {
   }
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TensorRTCacheModelHandler);
 
@@ -58,6 +57,5 @@ class TensorRTCacheModelHandler {
   nvinfer1::IRuntime* trt_runtime_;
   std::filesystem::path engine_cache_path_;
   std::string compute_capability_;
-  bool compute_capability_enable_;
 };  // TRTCacheModelHandler
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index a3d901d8dd14f..bfa6a2cc40834 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -1079,8 +1079,6 @@ Status BindKernelOutput(Ort::KernelContext& ctx,
                         char const* output_name,
                         size_t output_index,
                         size_t output_type,
-                        std::vector<IAllocatorUniquePtr<void>>& scratch_buffers,
-                        OrtAllocator* alloc,
                         cudaStream_t stream) {
   auto allocator = allocator_map[output_name].get();
   auto& shape = allocator->getOutputShape();
@@ -3537,7 +3535,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
         if (index_iter != output_indexes.end()) {
           output_index = index_iter->second;
         }
-        auto status = BindKernelOutput(ctx, &mem_info, dds_output_allocator_map, output_name, output_index, output_type, scratch_buffers, alloc, stream);
+        auto status = BindKernelOutput(ctx, &mem_info, dds_output_allocator_map, output_name, output_index, output_type, stream);
         if (status != Status::OK()) {
           return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, status.ErrorMessage());
         }
@@ -3818,7 +3816,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con
         if (index_iter != output_indexes.end()) {
           output_index = index_iter->second;
         }
-        auto status = BindKernelOutput(ctx, &mem_info, dds_output_allocator_map, output_name, output_index, output_type, scratch_buffers, alloc, stream);
+        auto status = BindKernelOutput(ctx, &mem_info, dds_output_allocator_map, output_name, output_index, output_type, stream);
         if (status != Status::OK()) {
           return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, status.ErrorMessage());
         }
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
index 7216a6da6839c..86645fabd36d9 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -304,7 +304,6 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   bool dump_ep_context_model_ = false;
   std::string ep_context_file_path_;
   int ep_context_embed_mode_ = 0;
-  bool ep_context_compute_capability_enable_ = false;
   std::unique_ptr<ONNX_NAMESPACE::ModelProto> model_proto_ = ONNX_NAMESPACE::ModelProto::Create();
 
   std::unordered_set<std::string> control_flow_op_set_ = {"If", "Loop", "Scan"};
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
index 1143af60486ea..ba9251c71bced 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
@@ -50,7 +50,6 @@ constexpr const char* kCudaGraphEnable = "trt_cuda_graph_enable";
 constexpr const char* kEpContextEmbedMode = "trt_ep_context_embed_mode";
 constexpr const char* kEpContextFilePath = "trt_ep_context_file_path";
 constexpr const char* kDumpEpContextModel = "trt_dump_ep_context_model";
-constexpr const char* kEpContextComputeCapabilityEnable = "trt_ep_context_compute_capability_enable";
 }  // namespace provider_option_names
 }  // namespace tensorrt
 
@@ -106,7 +105,6 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions
           .AddAssignmentToReference(tensorrt::provider_option_names::kDumpEpContextModel, info.dump_ep_context_model)
           .AddAssignmentToReference(tensorrt::provider_option_names::kEpContextFilePath, info.ep_context_file_path)
           .AddAssignmentToReference(tensorrt::provider_option_names::kEpContextEmbedMode, info.ep_context_embed_mode)
-          .AddAssignmentToReference(tensorrt::provider_option_names::kEpContextComputeCapabilityEnable, info.ep_context_compute_capability_enable)
           .Parse(options));  // add new provider option here.
 
   return info;
@@ -152,7 +150,6 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
       {tensorrt::provider_option_names::kDumpEpContextModel, MakeStringWithClassicLocale(info.dump_ep_context_model)},
       {tensorrt::provider_option_names::kEpContextFilePath, MakeStringWithClassicLocale(info.ep_context_file_path)},
       {tensorrt::provider_option_names::kEpContextEmbedMode, MakeStringWithClassicLocale(info.ep_context_embed_mode)},
-      {tensorrt::provider_option_names::kEpContextComputeCapabilityEnable, MakeStringWithClassicLocale(info.ep_context_compute_capability_enable)},
   };
   return options;
 }
@@ -209,7 +206,6 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor
       {tensorrt::provider_option_names::kEpContextFilePath, kEpContextFilePath_},
       {tensorrt::provider_option_names::kDumpEpContextModel, MakeStringWithClassicLocale(info.trt_dump_ep_context_model)},
       {tensorrt::provider_option_names::kEpContextEmbedMode, MakeStringWithClassicLocale(info.trt_ep_context_embed_mode)},
-      {tensorrt::provider_option_names::kEpContextComputeCapabilityEnable, MakeStringWithClassicLocale(info.trt_ep_context_compute_capability_enable)},
   };
   return options;
 }
@@ -305,6 +301,5 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
   trt_provider_options_v2.trt_dump_ep_context_model = internal_options.dump_ep_context_model;
   trt_provider_options_v2.trt_ep_context_embed_mode = internal_options.ep_context_embed_mode;
   trt_provider_options_v2.trt_ep_context_file_path = copy_string_if_needed(internal_options.ep_context_file_path);
-  trt_provider_options_v2.trt_ep_context_compute_capability_enable = internal_options.ep_context_compute_capability_enable;
 }
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
index 2518bdd5337a0..80424b8d6d196 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
@@ -54,7 +54,6 @@ struct TensorrtExecutionProviderInfo {
   bool dump_ep_context_model{false};
   std::string ep_context_file_path{""};
   int ep_context_embed_mode{0};
-  bool ep_context_compute_capability_enable{0};
   std::string engine_cache_prefix{""};
 
   static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options);
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
index 722f24c3fd6ae..568da57a50956 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
@@ -112,7 +112,6 @@ struct Tensorrt_Provider : Provider {
     info.dump_ep_context_model = options.trt_dump_ep_context_model != 0;
     info.ep_context_file_path = options.trt_ep_context_file_path == nullptr ? "" : options.trt_ep_context_file_path;
     info.ep_context_embed_mode = options.trt_ep_context_embed_mode;
-    info.ep_context_compute_capability_enable = options.trt_ep_context_compute_capability_enable != 0;
     info.engine_cache_prefix = options.trt_engine_cache_prefix == nullptr ? "" : options.trt_engine_cache_prefix;
 
     return std::make_shared<TensorrtProviderFactory>(info);
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index f715a07acdcb7..4b6c2a491334f 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -89,7 +89,7 @@ using IndexedSubGraph_MetaDef = IndexedSubGraph::MetaDef;
 #include "core/providers/cann/cann_provider_options.h"
 #include "core/providers/dnnl/dnnl_provider_options.h"
 
-#ifdef USE_TENSORRT
+#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
 #include "core/session/onnxruntime_session_options_config_keys.h"
 #endif
 
@@ -1422,7 +1422,6 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti
   trt_options_converted.trt_dump_ep_context_model = 0;
   trt_options_converted.trt_ep_context_file_path = "";
   trt_options_converted.trt_ep_context_embed_mode = 0;
-  trt_options_converted.trt_ep_context_compute_capability_enable = 0;
   trt_options_converted.trt_engine_cache_prefix = "";
 
   return trt_options_converted;
@@ -1450,10 +1449,6 @@ void UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(OrtSessionOptio
       LOGS_DEFAULT(VERBOSE) << "Invalid ep.context_embed_mode: " << embed_mode << " only 0 or 1 allowed. Set to 1.";
     }
     LOGS_DEFAULT(VERBOSE) << "User specified context cache embed mode: " << tensorrt_options->trt_ep_context_embed_mode;
-
-    auto context_hardware_arch_enable = (session_options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextHardwareArchitectureEnable, "0") != "0";
-    tensorrt_options->trt_ep_context_compute_capability_enable = context_hardware_arch_enable;
-    LOGS_DEFAULT(VERBOSE) << "User specified context hardware architecture enable: " << tensorrt_options->trt_ep_context_compute_capability_enable;
   }
 }
 #endif
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 9ce3a9a5fa07d..03ba54007b77e 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -741,14 +741,6 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
             } else {
               ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_ep_context_embed_mode' should be a positive integer number i.e. '1'.\n");
             }
-          } else if (option.first == "trt_ep_context_compute_capability_enable") {
-            if (option.second == "True" || option.second == "true") {
-              params.trt_ep_context_compute_capability_enable = true;
-            } else if (option.second == "False" || option.second == "false") {
-              params.trt_ep_context_compute_capability_enable = false;
-            } else {
-              ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_ep_context_compute_capability_enable' should be 'True' or 'False'. Default value is 'False'.\n");
-            }
           } else {
             ORT_THROW("Invalid TensorRT EP option: ", option.first);
           }
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 508739ae1d235..4e20fb976b49b 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -191,6 +191,8 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string
   OrtTensorRTProviderOptionsV2 params;
   params.trt_engine_cache_enable = 1;
   params.trt_engine_cache_prefix = "TRTEP_Cache_Test";
+  params.trt_dump_ep_context_model = 1;
+  params.trt_ep_context_file_path = "EP_Context_model.onnx";
   std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
   EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
   auto status = session_object.Load(model_name);
@@ -209,6 +211,9 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string
 
   // Verify on cache with customized prefix
   ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_engine_cache_prefix));
+
+  // Verify EP context model with user provided name
+  ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path));
 }
 
 void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string sess_log_id, bool has_non_zero_node = false) {
@@ -448,6 +453,8 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
 
     params.trt_engine_cache_enable = 1;
     params.trt_engine_cache_prefix = "TRTEP_Cache_Test";
+    params.trt_dump_ep_context_model = 1;
+    params.trt_ep_context_file_path = "EP_Context_model.onnx";
     std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
     EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
     auto status = session_object.Load(model_name);
@@ -576,6 +583,9 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
     // Verify on cache with customized prefix
     ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_engine_cache_prefix));
 
+    // Verify EP context model with user provided name
+    ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path));
+
     if (input_type.compare("static") == 0) {
       // Can't run inference since input shape changes but the engine is built with static input
       ASSERT_FALSE(status.IsOK());

From 2487933d869b39b9d427a34060664e8b8eb257b9 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Wed, 17 Jan 2024 18:33:46 +0000
Subject: [PATCH 10/25] remove trt_ep_context_compute_capability_check (cont.)

---
 .../providers/tensorrt/onnx_ctx_model_helper.cc   | 15 ++++++---------
 .../providers/tensorrt/onnx_ctx_model_helper.h    |  1 -
 .../tensorrt/tensorrt_execution_provider.cc       | 11 +----------
 .../providers/tensorrt/tensorrt_basic_test.cc     |  7 +++++++
 4 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
index 0bf0cd2635f13..d91ceb4211d44 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
@@ -74,7 +74,6 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer,
                                                char* engine_data,
                                                size_t size,
                                                const int64_t embed_mode,
-                                               bool compute_capability_enable,
                                                std::string compute_capability,
                                                const logging::Logger* logger) {
   auto model_build = graph_viewer.CreateModel(*logger);
@@ -111,18 +110,16 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer,
   } else {
     attr_1->set_s(engine_cache_path);
   }
+  attr_2->set_name(COMPUTE_CAPABILITY);
+  attr_2->set_type(onnx::AttributeProto_AttributeType_STRING);
+  attr_2->set_s(compute_capability);
+
   auto node_attributes = ONNX_NAMESPACE::NodeAttributes::Create();
-  int num_attributes = compute_capability_enable ? 3 : 2;
+  int num_attributes = 3;
   node_attributes->reserve(num_attributes);
   node_attributes->emplace(EMBED_MODE, *attr_0);
   node_attributes->emplace(EP_CACHE_CONTEXT, *attr_1);
-
-  if (compute_capability_enable) {
-    attr_2->set_name(COMPUTE_CAPABILITY);
-    attr_2->set_type(onnx::AttributeProto_AttributeType_STRING);
-    attr_2->set_s(compute_capability);
-    node_attributes->emplace(COMPUTE_CAPABILITY, *attr_2);
-  }
+  node_attributes->emplace(COMPUTE_CAPABILITY, *attr_2);
 
   // Create EP context node
   graph_build.AddNode(EPCONTEXT_OP, EPCONTEXT_OP, "", inputs, outputs, node_attributes.get(), EPCONTEXT_OP_DOMAIN);
diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
index 90fbdb7537bec..5c27300ef9d17 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
@@ -28,7 +28,6 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer,
                                                char* engine_data,
                                                size_t size,
                                                const int64_t embed_mode,
-                                               bool compute_capability_enable,
                                                std::string compute_capability,
                                                const logging::Logger* logger);
 std::string GetCtxNodeModelPath(const std::string& ep_context_file_path,
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index bfa6a2cc40834..a6b97a0c21d07 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -1381,7 +1381,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     dump_ep_context_model_ = info.dump_ep_context_model;
     ep_context_file_path_ = info.ep_context_file_path;
     ep_context_embed_mode_ = info.ep_context_embed_mode;
-    ep_context_compute_capability_enable_ = info.ep_context_compute_capability_enable;
   } else {
     try {
       const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations);
@@ -1552,11 +1551,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
         ep_context_embed_mode_ = std::stoi(ep_context_embed_mode_env);
       }
 
-      const std::string ep_context_compute_capability_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEpContextComputeCapabilityEnable);
-      if (!ep_context_compute_capability_env.empty()) {
-        ep_context_compute_capability_enable_ = (std::stoi(ep_context_compute_capability_env) == 0 ? false : true);
-      }
-
     } catch (const std::invalid_argument& ex) {
       LOGS_DEFAULT(WARNING) << "[TensorRT EP] Invalid Argument (from environment variables): " << ex.what();
     } catch (const std::out_of_range& ex) {
@@ -1699,7 +1693,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
                         << ", trt_dump_ep_context_model: " << dump_ep_context_model_
                         << ", trt_ep_context_file_path: " << ep_context_file_path_
                         << ", trt_ep_context_embed_mode: " << ep_context_embed_mode_
-                        << ", trt_ep_context_compute_capability_enable: " << ep_context_compute_capability_enable_
                         << ", trt_cache_prefix: " << cache_prefix_;
 }
 
@@ -3003,7 +2996,6 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
                                                                                      reinterpret_cast<char*>(serialized_engine->data()),
                                                                                      serialized_engine->size(),
                                                                                      ep_context_embed_mode_,
-                                                                                     ep_context_compute_capability_enable_,
                                                                                      compute_capability_,
                                                                                      GetLogger())};
           DumpCtxNodeModel(model_proto.get(), ctx_model_path_);
@@ -3071,7 +3063,6 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
                                           nullptr,
                                           0,
                                           ep_context_embed_mode_,
-                                          ep_context_compute_capability_enable_,
                                           compute_capability_,
                                           GetLogger()));
     if (ep_context_embed_mode_ == 0) {
@@ -3589,7 +3580,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con
   std::unordered_map<std::string, size_t> output_types;    // TRT engine output name -> ORT output tensor type
 
   // Get engine binary data and deserialize it
-  auto trt_cache_model_handler = TensorRTCacheModelHandler(&trt_engine, runtime_.get(), compute_capability_, ep_context_compute_capability_enable_);
+  auto trt_cache_model_handler = TensorRTCacheModelHandler(&trt_engine, runtime_.get(), compute_capability_);
   auto status = trt_cache_model_handler.GetEpContextFromGraph(graph_body_viewer);
   if (status != Status::OK()) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, status.ErrorMessage());
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 4e20fb976b49b..a1544b617b732 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -353,6 +353,13 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
   ASSERT_EQ(model_hash, model_hash3) << "model 1&3 are same models and they have same hash, no matter where they are loaded";
 }
 
+TEST(TensorrtExecutionProviderTest, EPContextNode) {
+  std::string model_name = "trt_execution_provider_multithreading_test.onnx";
+  std::string graph_name = "multithreading_test";
+  std::string sess_log_id = "TRTEPMultiThreadingTestWithOneSessionMultiThreads";
+  std::vector<int> dims = {1, 3, 2};
+}
+
 TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) {
   std::string model_name = "testdata/trt_plugin_custom_op_test.onnx";
   SessionOptions so;

From 178e86fd60ebe48af6d12a7a368c6bb69bc66482 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Wed, 17 Jan 2024 22:39:47 +0000
Subject: [PATCH 11/25] add unit test

---
 .../providers/tensorrt/tensorrt_basic_test.cc | 96 +++++++++++++++++--
 1 file changed, 90 insertions(+), 6 deletions(-)

diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index a1544b617b732..dc860839cd147 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -122,9 +122,15 @@ void CreateBaseModel(std::string model_name,
   status = onnxruntime::Model::Save(model, model_name);
 }
 
-bool HasCacheFileWithPrefix(const std::string& prefix) {
-  const std::filesystem::path current_dir = std::filesystem::current_path();
-  for (const auto& entry : std::filesystem::directory_iterator(current_dir)) {
+bool HasCacheFileWithPrefix(const std::string& prefix, std::string file_dir = "") {
+  std::filesystem::path target_dir; 
+  if (file_dir.empty()) {
+    target_dir = std::filesystem::current_path();
+  } else {
+    target_dir = std::filesystem::path(file_dir);
+  }
+
+  for (const auto& entry : std::filesystem::directory_iterator(target_dir)) {
     if (entry.is_regular_file()) {
       std::string filename = entry.path().filename().string();
       if (filename.rfind(prefix, 0) == 0) {
@@ -354,10 +360,88 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
 }
 
 TEST(TensorrtExecutionProviderTest, EPContextNode) {
-  std::string model_name = "trt_execution_provider_multithreading_test.onnx";
-  std::string graph_name = "multithreading_test";
-  std::string sess_log_id = "TRTEPMultiThreadingTestWithOneSessionMultiThreads";
+  std::string model_name = "EPContextNode_test.onnx";
+  std::string graph_name = "EPContextNode_test";
+  std::string sess_log_id = "EPContextNode_test";
   std::vector<int> dims = {1, 3, 2};
+  CreateBaseModel(model_name, graph_name, dims);
+
+  SessionOptions so;
+  so.session_logid = sess_log_id;
+  RunOptions run_options;
+  run_options.run_tag = so.session_logid;
+  InferenceSession session_object{so, GetEnvironment()};
+  auto cuda_provider = DefaultCudaExecutionProvider();
+  auto cpu_allocator = cuda_provider->CreatePreferredAllocators()[1];
+  std::vector<int64_t> dims_mul_x = {1, 3, 2};
+  std::vector<float> values_mul_x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+  OrtValue ml_value_x;
+  CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_x);
+  OrtValue ml_value_y;
+  CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_y);
+  OrtValue ml_value_z;
+  CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_z);
+  NameMLValMap feeds;
+  feeds.insert(std::make_pair("X", ml_value_x));
+  feeds.insert(std::make_pair("Y", ml_value_y));
+  feeds.insert(std::make_pair("Z", ml_value_z));
+
+  // prepare outputs
+  std::vector<std::string> output_names;
+  output_names.push_back("M");
+
+  // prepare expected inputs and outputs
+  std::vector<int64_t> expected_dims_mul_m = {1, 3, 2};
+  std::vector<float> expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f};
+
+  // Test dumping EP context model to provided path
+  OrtTensorRTProviderOptionsV2 params;
+  params.trt_engine_cache_enable = 1;
+  params.trt_dump_ep_context_model = 1;
+  params.trt_ep_context_file_path = "EP_Context_model.onnx";
+  std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
+  EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
+  auto status = session_object.Load(model_name);
+  ASSERT_TRUE(status.IsOK());
+  status = session_object.Initialize();
+  ASSERT_TRUE(status.IsOK());
+  // "EP_Context_model.onnx" should be created
+  ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path));
+
+  // Test dumping EP context model to provided path
+  InferenceSession session_object2{so, GetEnvironment()};
+  OrtTensorRTProviderOptionsV2 params2;
+  params2.trt_engine_cache_enable = 1;
+  params2.trt_dump_ep_context_model = 1;
+  params2.trt_engine_cache_path = "./trt_engine_cache";
+  params2.trt_ep_context_file_path = "EP_Context_model.onnx";
+  execution_provider = TensorrtExecutionProviderWithOptions(&params2);
+  EXPECT_TRUE(session_object2.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
+  status = session_object2.Load(model_name);
+  ASSERT_TRUE(status.IsOK());
+  status = session_object2.Initialize();
+  ASSERT_TRUE(status.IsOK());
+  // "./trt_engine_cache/EP_Context_model.onnx" should be created
+  ASSERT_TRUE(HasCacheFileWithPrefix(params2.trt_ep_context_file_path, params2.trt_engine_cache_path));
+
+  // Test EP context model inference
+  InferenceSession session_object3{so, GetEnvironment()};
+  OrtTensorRTProviderOptionsV2 params3;
+  model_name = "EP_Context_model.onnx";
+  execution_provider = TensorrtExecutionProviderWithOptions(&params3);
+  EXPECT_TRUE(session_object3.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
+  status = session_object3.Load(model_name);
+  ASSERT_TRUE(status.IsOK());
+  status = session_object3.Initialize();
+  ASSERT_TRUE(status.IsOK());
+  // run inference
+  // TRT engine will be created and cached
+  // TRT profile will be created and cached only for dynamic input shape
+  // Data in profile,
+  // X: 1, 3, 3, 2, 2, 2
+  // Y: 1, 3, 3, 2, 2, 2
+  // Z: 1, 3, 3, 2, 2, 2
+  RunSession(session_object3, run_options, feeds, output_names, expected_dims_mul_m, expected_values_mul_m);
 }
 
 TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) {

From a69758d7edebd62ca903ec77aea50905a68c2197 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Wed, 17 Jan 2024 22:41:41 +0000
Subject: [PATCH 12/25] lintrunner -a

---
 .../providers/tensorrt/onnx_ctx_model_helper.cc    | 14 +++++++-------
 .../providers/tensorrt/onnx_ctx_model_helper.h     |  3 ++-
 onnxruntime/core/session/provider_bridge_ort.cc    |  6 +++---
 onnxruntime/python/onnxruntime_pybind_state.cc     | 12 ++++++------
 .../tensorrt/gen_trt_engine_wrapper_onnx_model.py  |  1 +
 .../test/providers/tensorrt/tensorrt_basic_test.cc |  2 +-
 6 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
index d91ceb4211d44..232dbfd882017 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
@@ -137,8 +137,8 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer,
 
 /*
  * Get "EP context node" model path
- * 
- * 
+ *
+ *
  * If ep_context_file_path is provided:
  *     - If ep_context_file_path is a file:
  *         - If it's a file name without any path associated with it, return "engine_cache_path/ep_context_file_path".
@@ -146,14 +146,14 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer,
  *     - If ep_context_file_path is a directory, return "ep_context_file_path/original_model_name_ctx.onnx".
  * If ep_context_file_path is not provided:
  *     - Return "engine_cache_path/original_model_name_ctx.onnx".
- * 
- * 
+ *
+ *
  * Example 1:
  * ep_context_file_path = "/home/user/ep_context_model_foler"
  * engine_cache_path = "trt_engine.engine"
  * original_model_path = "model.onnx"
  * => return "/home/user/ep_context_model_folder/model_ctx.onnx"
- * 
+ *
  * Example 2:
  * ep_context_file_path = "my_ctx_model.onnx"
  * engine_cache_path = "/home/user/cache_folder/trt_engine.engine"
@@ -165,13 +165,13 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer,
  * engine_cache_path = "trt_engine.engine"
  * original_model_path = "model.onnx"
  * => return "/home/user2/ep_context_model_foler/my_ctx_model.onnx"
- * 
+ *
  * Example 4:
  * ep_context_file_path = ""
  * engine_cache_path = "/home/user3/cache_folder/trt_engine.engine"
  * original_model_path = "model.onnx"
  * => return "/home/user3/cache_folder/model_ctx.onnx"
- * 
+ *
  */
 std::string GetCtxNodeModelPath(const std::string& ep_context_file_path,
                                 const std::string& engine_cache_path,
diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
index 5c27300ef9d17..d4f53a1d532c1 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
@@ -16,7 +16,8 @@ static const std::string EMBED_MODE = "embed_mode";
 static const std::string EP_CACHE_CONTEXT = "ep_cache_context";
 static const std::string COMPUTE_CAPABILITY = "hardware_architecture";
 static const std::string EPCONTEXT_OP_DOMAIN = "com.microsoft";
-static const std::string EPCONTEXT_WARNING = "It's suggested to set the ORT graph optimization level to 0 and  \
+static const std::string EPCONTEXT_WARNING =
+    "It's suggested to set the ORT graph optimization level to 0 and  \
                                               make \"embed_mode\" to 0 (\"ep_cache_context\" is the cache path)\
                                               for the best model loading time";
 
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 4b6c2a491334f..9d26c13ce47a3 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1480,10 +1480,10 @@ std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Creat
 
 std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(void* session_options, const OrtTensorRTProviderOptionsV2* provider_options) {
   // We need to create a new provider options V2 object and copy from provider_options, due to the "const" object pointed by provider_options can't be modified.
-  // 
+  //
   // Note: No need to worry about tensorrt_options being a local variable, CreateExecutionProviderFactory() in TRT EP will
   // create a factory object that copies any provider options from tensorrt_options including "const char*" provider options.
-  OrtTensorRTProviderOptionsV2 tensorrt_options = *provider_options; // copy and assign from provider_options
+  OrtTensorRTProviderOptionsV2 tensorrt_options = *provider_options;  // copy and assign from provider_options
 
 #if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
   onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast<OrtSessionOptions*>(session_options), &tensorrt_options);
@@ -1768,7 +1768,7 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_MIGraphX, _In_ OrtS
 
 ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptions* tensorrt_options) {
   API_IMPL_BEGIN
-  
+
   std::shared_ptr<onnxruntime::IExecutionProviderFactory> factory;
 
 #if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 03ba54007b77e..f7ed5520727db 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -729,12 +729,12 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
               ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_ep_context_model' should be 'True' or 'False'. Default value is 'False'.\n");
             }
           } else if (option.first == "trt_ep_context_file_path") {
-              if (!option.second.empty()) {
-                ep_context_file_path = option.second;
-                params.trt_ep_context_file_path = ep_context_file_path.c_str();
-              } else {
-                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_ep_context_file_path' should be a string.\n");
-              }
+            if (!option.second.empty()) {
+              ep_context_file_path = option.second;
+              params.trt_ep_context_file_path = ep_context_file_path.c_str();
+            } else {
+              ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_ep_context_file_path' should be a string.\n");
+            }
           } else if (option.first == "trt_ep_context_embed_mode") {
             if (!option.second.empty()) {
               params.trt_ep_context_embed_mode = std::stoi(option.second);
diff --git a/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py b/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py
index 92e92699299d5..b94c2cb76a635 100644
--- a/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py
+++ b/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py
@@ -32,6 +32,7 @@ def __init__(self, args):
         trt.init_libnvinfer_plugins(logger, "")
         if len(self.plugins):
             import ctypes
+
             ctypes.CDLL(self.plugins)
 
         # Deserialize an TRT engine
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index dc860839cd147..a1d19ecbabdcf 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -123,7 +123,7 @@ void CreateBaseModel(std::string model_name,
 }
 
 bool HasCacheFileWithPrefix(const std::string& prefix, std::string file_dir = "") {
-  std::filesystem::path target_dir; 
+  std::filesystem::path target_dir;
   if (file_dir.empty()) {
     target_dir = std::filesystem::current_path();
   } else {

From c14efe1ca1111fcf91ebdcfbab5251aa42fce1a0 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 18 Jan 2024 01:58:42 +0000
Subject: [PATCH 13/25] remove newly added two factory create functions

---
 .../tensorrt_provider_factory_creator.h       |  2 -
 .../core/session/provider_bridge_ort.cc       | 49 ++++++++-----------
 2 files changed, 20 insertions(+), 31 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h
index 96917c8fb8e88..d905003fb7cc1 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h
@@ -15,8 +15,6 @@ namespace onnxruntime {
 struct TensorrtProviderFactoryCreator {
   static std::shared_ptr<IExecutionProviderFactory> Create(int device_id);
   static std::shared_ptr<IExecutionProviderFactory> Create(const OrtTensorRTProviderOptions* provider_options);
-  static std::shared_ptr<IExecutionProviderFactory> Create(void* session_options, const OrtTensorRTProviderOptions* provider_options);
   static std::shared_ptr<IExecutionProviderFactory> Create(const OrtTensorRTProviderOptionsV2* provider_options);
-  static std::shared_ptr<IExecutionProviderFactory> Create(void* session_options, const OrtTensorRTProviderOptionsV2* provider_options);
 };
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 9d26c13ce47a3..d529c9312a4ab 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1462,37 +1462,10 @@ std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Creat
   return s_library_tensorrt.Get().CreateExecutionProviderFactory(&trt_options_converted);
 }
 
-std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(void* session_options, const OrtTensorRTProviderOptions* provider_options) {
-  OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(provider_options);
-
-#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
-  onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast<OrtSessionOptions*>(session_options), &trt_options_converted);
-#else
-  ORT_UNUSED_PARAMETER(session_options);
-#endif
-
-  return s_library_tensorrt.Get().CreateExecutionProviderFactory(&trt_options_converted);
-}
-
 std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(const OrtTensorRTProviderOptionsV2* provider_options) {
   return s_library_tensorrt.Get().CreateExecutionProviderFactory(provider_options);
 }
 
-std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Create(void* session_options, const OrtTensorRTProviderOptionsV2* provider_options) {
-  // We need to create a new provider options V2 object and copy from provider_options, due to the "const" object pointed by provider_options can't be modified.
-  //
-  // Note: No need to worry about tensorrt_options being a local variable, CreateExecutionProviderFactory() in TRT EP will
-  // create a factory object that copies any provider options from tensorrt_options including "const char*" provider options.
-  OrtTensorRTProviderOptionsV2 tensorrt_options = *provider_options;  // copy and assign from provider_options
-
-#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
-  onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast<OrtSessionOptions*>(session_options), &tensorrt_options);
-#else
-  ORT_UNUSED_PARAMETER(session_options);
-#endif
-  return s_library_tensorrt.Get().CreateExecutionProviderFactory(&tensorrt_options);
-}
-
 std::shared_ptr<IExecutionProviderFactory> MIGraphXProviderFactoryCreator::Create(const OrtMIGraphXProviderOptions* provider_options) {
   return s_library_migraphx.Get().CreateExecutionProviderFactory(provider_options);
 }
@@ -1779,7 +1752,14 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In
 
   // If EP context configs are provided in session options, we need to propagate them to provider options
   if (ep_context_cache_enabled_from_sess_options) {
-    factory = onnxruntime::TensorrtProviderFactoryCreator::Create(options, tensorrt_options);
+    OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(tensorrt_options);
+
+#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
+    onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(options, &trt_options_converted);
+#else
+    ORT_UNUSED_PARAMETER(session_options);
+#endif
+    factory = onnxruntime::TensorrtProviderFactoryCreator::Create(&trt_options_converted);
   } else {
     factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options);
   }
@@ -1935,7 +1915,18 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2,
   // if provider options already have the EP context configs provided, the configs in session options will be ignored
   // since provider options has higher priority than session options.
   if (!ep_context_cache_enabled_from_provider_options && ep_context_cache_enabled_from_sess_options) {
-    factory = onnxruntime::TensorrtProviderFactoryCreator::Create(options, tensorrt_options);
+    // We need to create a new provider options V2 object and copy from provider_options, due to the "const" object pointed by provider_options can't be modified.
+    //
+    // Note: No need to worry about tensorrt_options being a local variable, CreateExecutionProviderFactory() in TRT EP will
+    // create a factory object that copies any provider options from tensorrt_options including "const char*" provider options.
+    OrtTensorRTProviderOptionsV2 new_tensorrt_options = *tensorrt_options;  // copy and assign from tensorrt_options
+
+#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
+    onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(options, &new_tensorrt_options);
+#else
+    ORT_UNUSED_PARAMETER(session_options);
+#endif
+    factory = onnxruntime::TensorrtProviderFactoryCreator::Create(&new_tensorrt_options);
   } else {
     factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options);
   }

From f00d137872565cd854397e07f1c32ea544546cab Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 18 Jan 2024 17:00:40 +0000
Subject: [PATCH 14/25] fix compile error

---
 onnxruntime/core/session/provider_bridge_ort.cc | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index d529c9312a4ab..5dcb3613946a7 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1754,11 +1754,7 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In
   if (ep_context_cache_enabled_from_sess_options) {
     OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(tensorrt_options);
 
-#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
     onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(options, &trt_options_converted);
-#else
-    ORT_UNUSED_PARAMETER(session_options);
-#endif
     factory = onnxruntime::TensorrtProviderFactoryCreator::Create(&trt_options_converted);
   } else {
     factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options);
@@ -1916,16 +1912,11 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2,
   // since provider options has higher priority than session options.
   if (!ep_context_cache_enabled_from_provider_options && ep_context_cache_enabled_from_sess_options) {
     // We need to create a new provider options V2 object and copy from provider_options, due to the "const" object pointed by provider_options can't be modified.
-    //
     // Note: No need to worry about tensorrt_options being a local variable, CreateExecutionProviderFactory() in TRT EP will
     // create a factory object that copies any provider options from tensorrt_options including "const char*" provider options.
     OrtTensorRTProviderOptionsV2 new_tensorrt_options = *tensorrt_options;  // copy and assign from tensorrt_options
 
-#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
     onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(options, &new_tensorrt_options);
-#else
-    ORT_UNUSED_PARAMETER(session_options);
-#endif
     factory = onnxruntime::TensorrtProviderFactoryCreator::Create(&new_tensorrt_options);
   } else {
     factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options);

From 997913b34c04caae31b9e1b644067ce6ce744907 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 18 Jan 2024 19:34:12 +0000
Subject: [PATCH 15/25] fix compile error

---
 onnxruntime/core/session/provider_bridge_ort.cc | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 5dcb3613946a7..1b3b7de7d89c5 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1746,10 +1746,6 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In
 
 #if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
   auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") != "0";
-#else
-  auto ep_context_cache_enabled_from_sess_options = false;
-#endif
-
   // If EP context configs are provided in session options, we need to propagate them to provider options
   if (ep_context_cache_enabled_from_sess_options) {
     OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(tensorrt_options);
@@ -1759,6 +1755,11 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In
   } else {
     factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options);
   }
+#else
+  factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options);
+#endif
+
+
 
   if (!factory) {
     return OrtApis::CreateStatus(ORT_FAIL, "SessionOptionsAppendExecutionProvider_Tensorrt: Failed to load shared library");
@@ -1902,10 +1903,6 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2,
 #if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT)
   auto ep_context_cache_enabled_from_provider_options = tensorrt_options->trt_dump_ep_context_model != 0;
   auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") != "0";
-#else
-  auto ep_context_cache_enabled_from_provider_options = false;
-  auto ep_context_cache_enabled_from_sess_options = false;
-#endif
 
   // If EP context configs are provided in session options, we need to propagate them to provider options. However,
   // if provider options already have the EP context configs provided, the configs in session options will be ignored
@@ -1921,6 +1918,9 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2,
   } else {
     factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options);
   }
+#else
+  factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options);
+#endif
 
   if (!factory) {
     return OrtApis::CreateStatus(ORT_FAIL, "OrtSessionOptionsAppendExecutionProvider_TensorRT: Failed to load shared library");

From 3bc8e793d6ecdf10cf27ed47e556112773c9a19a Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 19 Jan 2024 06:59:02 +0000
Subject: [PATCH 16/25] Make 'ep_cache_context' node attribute only have
 restrictive path for security purpose

---
 .../tensorrt/onnx_ctx_model_helper.cc         | 170 ++++++++++--------
 .../tensorrt/onnx_ctx_model_helper.h          |  18 +-
 .../tensorrt/tensorrt_execution_provider.cc   |  72 +++++---
 3 files changed, 152 insertions(+), 108 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
index 232dbfd882017..1c64df344e475 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
@@ -38,13 +38,6 @@ const onnxruntime::Path& GetModelPath(const GraphViewer& graph_viewer) {
   return main_graph.ModelPath();
 }
 
-std::filesystem::path LocateEngineRelativeToPath(std::string engine_cache_path, const onnxruntime::Path& path) {
-  std::filesystem::path base_path(path.ToPathString());
-  std::filesystem::path parent_path = base_path.parent_path();
-  std::filesystem::path engine_path = parent_path.append(engine_cache_path);
-  return engine_path;
-}
-
 /*
  * Update ep_cache_context attribute of the EP context node with the given engine binary data
  */
@@ -69,7 +62,7 @@ void UpdateCtxNodeModelEngineContext(ONNX_NAMESPACE::ModelProto* model_proto,
 /*
  * Create "EP context node" model where engine information is embedded
  */
-ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer,
+ONNX_NAMESPACE::ModelProto* CreateCtxModel(const GraphViewer& graph_viewer,
                                                const std::string engine_cache_path,
                                                char* engine_data,
                                                size_t size,
@@ -136,60 +129,56 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer,
 }
 
 /*
- * Get "EP context node" model path
- *
+ * Return the directory where the ep context model locates
+ */
+std::filesystem::path GetPathOrParentPathOfCtxModel(const std::string& ep_context_file_path) {
+  if (ep_context_file_path.empty()) {
+    return std::filesystem::path();
+  }
+  std::filesystem::path ctx_path(ep_context_file_path);
+  if (std::filesystem::is_directory(ep_context_file_path)) {
+    return ctx_path;
+  } else {
+    return ctx_path.parent_path();
+  }
+}
+
+/*
+ * Get "EP context" model path.
  *
- * If ep_context_file_path is provided:
- *     - If ep_context_file_path is a file:
- *         - If it's a file name without any path associated with it, return "engine_cache_path/ep_context_file_path".
-           - If it's a file name with path associated with it, return "ep_context_file_path".
+ * Function logic:
+ * If ep_context_file_path is provided,
+ *     - If ep_context_file_path is a file, return "ep_context_file_path".
  *     - If ep_context_file_path is a directory, return "ep_context_file_path/original_model_name_ctx.onnx".
- * If ep_context_file_path is not provided:
- *     - Return "engine_cache_path/original_model_name_ctx.onnx".
+ * If ep_context_file_path is not provided,
+ *     - Return "original_model_name_ctx.onnx".
  *
+ * TRT EP has rules about context model path and engine cache path (see tensorrt_execution_provider.cc):
+ * - If dump_ep_context_model_ and engine_cache_enabled_ is enabled, TRT EP will dump context model and save engine cache
+ *   to the same directory provided by ep_context_file_path_. (i.e. engine_cache_path_ = ep_context_file_path_)
  *
  * Example 1:
  * ep_context_file_path = "/home/user/ep_context_model_foler"
- * engine_cache_path = "trt_engine.engine"
  * original_model_path = "model.onnx"
  * => return "/home/user/ep_context_model_folder/model_ctx.onnx"
  *
  * Example 2:
  * ep_context_file_path = "my_ctx_model.onnx"
- * engine_cache_path = "/home/user/cache_folder/trt_engine.engine"
  * original_model_path = "model.onnx"
- * => return "/home/user/cache_folder/my_ctx_model.onnx"
+ * => return "my_ctx_model.onnx"
  *
  * Example 3:
  * ep_context_file_path = "/home/user2/ep_context_model_foler/my_ctx_model.onnx"
- * engine_cache_path = "trt_engine.engine"
  * original_model_path = "model.onnx"
  * => return "/home/user2/ep_context_model_foler/my_ctx_model.onnx"
  *
- * Example 4:
- * ep_context_file_path = ""
- * engine_cache_path = "/home/user3/cache_folder/trt_engine.engine"
- * original_model_path = "model.onnx"
- * => return "/home/user3/cache_folder/model_ctx.onnx"
- *
  */
-std::string GetCtxNodeModelPath(const std::string& ep_context_file_path,
-                                const std::string& engine_cache_path,
-                                const std::string& original_model_path) {
+std::string GetCtxModelPath(const std::string& ep_context_file_path,
+                            const std::string& original_model_path) {
   std::string ctx_model_path;
 
   if (!ep_context_file_path.empty() && !std::filesystem::is_directory(ep_context_file_path)) {
-    std::filesystem::path ctx_model_file_path = ep_context_file_path;
-    if (ctx_model_file_path.filename().string() == ep_context_file_path) {
-      std::filesystem::path cache_path = engine_cache_path;
-      if (cache_path.has_parent_path()) {
-        ctx_model_path = cache_path.parent_path().append(ep_context_file_path).string();
-      } else {
-        ctx_model_path = ep_context_file_path;
-      }
-    } else {
-      ctx_model_path = ep_context_file_path;
-    }
+    ctx_model_path = ep_context_file_path;
   } else {
     std::filesystem::path model_path = original_model_path;
     std::filesystem::path model_name_stem = model_path.stem();  // model_name.onnx -> model_name
@@ -199,28 +188,54 @@ std::string GetCtxNodeModelPath(const std::string& ep_context_file_path,
       std::filesystem::path model_directory = ep_context_file_path;
       ctx_model_path = model_directory.append(ctx_model_name).string();
     } else {
-      std::filesystem::path cache_path = engine_cache_path;
-      if (cache_path.has_parent_path()) {
-        ctx_model_path = cache_path.parent_path().append(ctx_model_name).string();
-      } else {
-        ctx_model_path = ctx_model_name;
-      }
+      ctx_model_path = ctx_model_name;
     }
   }
   return ctx_model_path;
 }
 
 /*
- * Dump "EP context node" model
+ * Dump "EP context" model
  *
  */
-void DumpCtxNodeModel(ONNX_NAMESPACE::ModelProto* model_proto,
+void DumpCtxModel(ONNX_NAMESPACE::ModelProto* model_proto,
                       const std::string& ctx_model_path) {
   std::fstream dump(ctx_model_path, std::ios::out | std::ios::trunc | std::ios::binary);
   model_proto->SerializeToOstream(dump);
   LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Dumped " + ctx_model_path;
 }
 
+bool IsAbsolutePath(std::string& path_string) {
+#ifdef _WIN32
+  onnxruntime::PathString ort_path_string = onnxruntime::ToPathString(path_string);
+  auto path = std::filesystem::path(ort_path_string.c_str());
+  return path.is_absolute();
+#else
+  if (!path_string.empty() && path_string[0] == '/') {
+    return true;
+  }
+  return false;
+#endif
+}
+
+// Like "../file_path"
+bool IsRelativePathToParentPath(std::string& path_string) {
+#ifdef _WIN32
+  onnxruntime::PathString ort_path_string = onnxruntime::ToPathString(path_string);
+  auto path = std::filesystem::path(ort_path_string.c_str());
+  auto relative_path = path.lexically_normal().make_preferred().wstring();
+  if (relative_path.find(L"..", 0) != std::string::npos) {
+    return true;
+  }
+  return false;
+#else
+  if (!path_string.empty() && path_string.find("..", 0) != std::string::npos) {
+    return true;
+  }
+  return false;
+#endif
+}
+
 Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph_viewer) {
   if (!ValidateEPCtxNode(graph_viewer)) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "It's not a valid EP Context node");
@@ -229,8 +244,8 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph
   auto& attrs = node->GetAttributes();
 
   const int64_t embed_mode = attrs.at(EMBED_MODE).i();
-  if (embed_mode) {
-    // Get engine from byte stream
+  if (embed_mode) { 
+    // Get engine from byte stream.
     const std::string& context_binary = attrs.at(EP_CACHE_CONTEXT).s();
     *(trt_engine_) = std::unique_ptr<nvinfer1::ICudaEngine>(trt_runtime_->deserializeCudaEngine(const_cast<char*>(context_binary.c_str()),
                                                                                                 static_cast<size_t>(context_binary.length())));
@@ -239,20 +254,37 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph
       return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
                              "TensorRT EP could not deserialize engine from binary data");
     }
-  } else {
-    // Get engine from cache file
-    std::ifstream engine_file(engine_cache_path_.string(), std::ios::binary | std::ios::in);
+  } else { 
+    // Get engine from cache file.
+    std::string cache_path = attrs.at(EP_CACHE_CONTEXT).s();
+
+    // For security purpose, in the case of running context model, TRT EP won't allow
+    // engine cache path to be the relative path like "../file_path" or the absolute path.
+    // It only allows the engine cache to be in the same directory or sub directory of the context model.
+    if (IsAbsolutePath(cache_path)) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "For security purpose, the ep_cache_context attribute should be set with a relative path, but it is an absolute path:  " + cache_path);
+    }
+    if (IsRelativePathToParentPath(cache_path)) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "The file path in ep_cache_context attribute has '..'. For security purpose, it's not allowed to point outside the directory.");
+    }
+
+    // The engine cache and context model (current model) should be in the same directory
+    std::filesystem::path ctx_model_dir(GetPathOrParentPathOfCtxModel(ep_context_model_path_));
+    auto engine_cache_path = ctx_model_dir.append(cache_path);
+
+    std::ifstream engine_file(engine_cache_path.string(), std::ios::binary | std::ios::in);
     engine_file.seekg(0, std::ios::end);
     size_t engine_size = engine_file.tellg();
     engine_file.seekg(0, std::ios::beg);
     std::unique_ptr<char[]> engine_buf{new char[engine_size]};
     engine_file.read((char*)engine_buf.get(), engine_size);
     *(trt_engine_) = std::unique_ptr<nvinfer1::ICudaEngine>(trt_runtime_->deserializeCudaEngine(engine_buf.get(), engine_size));
-    LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path_.string();
     if (!(*trt_engine_)) {
       return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
-                             "TensorRT EP could not deserialize engine from cache: " + engine_cache_path_.string());
+                             "TensorRT EP could not deserialize engine from cache: " + engine_cache_path.string() + 
+                             ". Please make sure engine cache is inside the directory of trt_ep_context_file_path.");
     }
+    LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path.string();
   }
   return Status::OK();
 }
@@ -277,27 +309,15 @@ bool TensorRTCacheModelHandler::ValidateEPCtxNode(const GraphViewer& graph_viewe
   }
 
   // "embed_mode" attr and "ep_cache_context" attr should be present
-  if (attrs.count(EMBED_MODE) > 0 && attrs.count(EP_CACHE_CONTEXT) > 0) {
-    // ep_cache_context: payload of the execution provider context if embed_mode=1, or path to the context file if embed_mode=0
-    const int64_t embed_mode = attrs.at(EMBED_MODE).i();
-
-    // engine cache path
-    if (embed_mode == 0) {
-      // First assume engine cache path is relatvie to model path,
-      // If not, then assume the engine cache path is an absolute path.
-      engine_cache_path_ = LocateEngineRelativeToPath(attrs.at(EP_CACHE_CONTEXT).s(), GetModelPath(graph_viewer));
-      auto default_engine_cache_path_ = engine_cache_path_;
-      if (!std::filesystem::exists(engine_cache_path_)) {
-        engine_cache_path_.assign(attrs.at(EP_CACHE_CONTEXT).s());
-        if (!std::filesystem::exists(engine_cache_path_)) {
-          LOGS_DEFAULT(ERROR) << "Can't find " << default_engine_cache_path_.string() << " or " << engine_cache_path_.string() << " TensorRT engine";
-          return false;
-        }
-      }
-    } else if (embed_mode == 1) {
-      LOGS_DEFAULT(WARNING) << EPCONTEXT_WARNING;
-    }
+  assert(attrs.count(EMBED_MODE) > 0);
+  assert(attrs.count(EP_CACHE_CONTEXT) > 0);
+
+  const int64_t embed_mode = attrs.at(EMBED_MODE).i();
+  if (embed_mode == 1) {
+    // engine binary data
+    LOGS_DEFAULT(WARNING) << EPCONTEXT_WARNING;
   }
+
   return true;
 }
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
index d4f53a1d532c1..8ff686c859d03 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
@@ -23,18 +23,19 @@ static const std::string EPCONTEXT_WARNING =
 
 bool GraphHasCtxNode(const GraphViewer& graph_viewer);
 const onnxruntime::Path& GetModelPath(const GraphViewer& graph_viewer);
-std::filesystem::path LocateEngineRelativeToPath(std::string engine_cache_path, const onnxruntime::Path& path);
-ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer,
+std::filesystem::path GetPathOrParentPathOfCtxModel(const std::string& ep_context_file_path);
+ONNX_NAMESPACE::ModelProto* CreateCtxModel(const GraphViewer& graph_viewer,
                                                const std::string engine_cache_path,
                                                char* engine_data,
                                                size_t size,
                                                const int64_t embed_mode,
                                                std::string compute_capability,
                                                const logging::Logger* logger);
-std::string GetCtxNodeModelPath(const std::string& ep_context_file_path,
-                                const std::string& engine_cache_path,
-                                const std::string& original_model_path);
-void DumpCtxNodeModel(ONNX_NAMESPACE::ModelProto* model_proto,
+std::string GetCtxModelPath(const std::string& ep_context_file_path,
+                            const std::string& original_model_path);
+bool IsAbsolutePath(std::string& path_string);
+bool IsRelativePathToParentPath(std::string& path_string);
+void DumpCtxModel(ONNX_NAMESPACE::ModelProto* model_proto,
                       const std::string& ctx_model_path);
 void UpdateCtxNodeModelEngineContext(ONNX_NAMESPACE::ModelProto* model_proto,
                                      char* engine_data,
@@ -44,7 +45,8 @@ class TensorRTCacheModelHandler {
  public:
   TensorRTCacheModelHandler(std::unique_ptr<nvinfer1::ICudaEngine>* trt_engine,
                             nvinfer1::IRuntime* trt_runtime,
-                            std::string compute_capability) : trt_engine_(trt_engine), trt_runtime_(trt_runtime), compute_capability_(compute_capability) {
+                            std::string ep_context_model_path,
+                            std::string compute_capability) : trt_engine_(trt_engine), trt_runtime_(trt_runtime), ep_context_model_path_(ep_context_model_path), compute_capability_(compute_capability) {
   }
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TensorRTCacheModelHandler);
 
@@ -55,7 +57,7 @@ class TensorRTCacheModelHandler {
  private:
   std::unique_ptr<nvinfer1::ICudaEngine>* trt_engine_;
   nvinfer1::IRuntime* trt_runtime_;
-  std::filesystem::path engine_cache_path_;
+  std::string ep_context_model_path_; // If using context model, it implies context model and engine cache is in the same directory
   std::string compute_capability_;
 };  // TRTCacheModelHandler
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index a6b97a0c21d07..27df32c9a17fd 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -1348,6 +1348,9 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     timing_cache_enable_ = info.timing_cache_enable;
     force_timing_cache_match_ = info.force_timing_cache;
     detailed_build_log_ = info.detailed_build_log;
+    dump_ep_context_model_ = info.dump_ep_context_model;
+    ep_context_file_path_ = info.ep_context_file_path;
+    ep_context_embed_mode_ = info.ep_context_embed_mode;
     if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
       cache_path_ = info.engine_cache_path;
       cache_prefix_ = info.engine_cache_prefix;
@@ -1378,9 +1381,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     profile_max_shapes = info.profile_max_shapes;
     profile_opt_shapes = info.profile_opt_shapes;
     cuda_graph_enable_ = info.cuda_graph_enable;
-    dump_ep_context_model_ = info.dump_ep_context_model;
-    ep_context_file_path_ = info.ep_context_file_path;
-    ep_context_embed_mode_ = info.ep_context_embed_mode;
   } else {
     try {
       const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations);
@@ -1458,6 +1458,21 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
       if (!timing_force_match_env.empty()) {
         force_timing_cache_match_ = (std::stoi(timing_force_match_env) == 0 ? false : true);
       }
+     
+      const std::string dump_ep_context_model_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDumpEpContextModel);
+      if (!dump_ep_context_model_env.empty()) {
+        dump_ep_context_model_ = (std::stoi(dump_ep_context_model_env) == 0 ? false : true);
+      }
+
+      const std::string ep_context_file_path_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEpContextComputeCapabilityEnable);
+      if (!ep_context_file_path_env.empty()) {
+        ep_context_file_path_ = ep_context_file_path_env;
+      }
+
+      const std::string ep_context_embed_mode_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEpContextEmbedMode);
+      if (!ep_context_embed_mode_env.empty()) {
+        ep_context_embed_mode_ = std::stoi(ep_context_embed_mode_env);
+      }
 
       if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
         const std::string engine_cache_path = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEngineCachePath);
@@ -1536,21 +1551,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
         cuda_graph_enable_ = (std::stoi(cuda_graph_enable_env) == 0 ? false : true);
       }
 
-      const std::string dump_ep_context_model_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDumpEpContextModel);
-      if (!dump_ep_context_model_env.empty()) {
-        dump_ep_context_model_ = (std::stoi(dump_ep_context_model_env) == 0 ? false : true);
-      }
-
-      const std::string ep_context_file_path_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEpContextComputeCapabilityEnable);
-      if (!ep_context_file_path_env.empty()) {
-        ep_context_file_path_ = ep_context_file_path_env;
-      }
-
-      const std::string ep_context_embed_mode_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEpContextEmbedMode);
-      if (!ep_context_embed_mode_env.empty()) {
-        ep_context_embed_mode_ = std::stoi(ep_context_embed_mode_env);
-      }
-
     } catch (const std::invalid_argument& ex) {
       LOGS_DEFAULT(WARNING) << "[TensorRT EP] Invalid Argument (from environment variables): " << ex.what();
     } catch (const std::out_of_range& ex) {
@@ -1655,6 +1655,28 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     }
   }
 
+  // If ep_context_file_path_ is provided as a directory, create it if it's not existed
+  if (!ep_context_file_path_.empty() && std::filesystem::path(ep_context_file_path_).extension().empty() && !std::filesystem::is_directory(ep_context_file_path_)) {
+    if (!std::filesystem::create_directory(ep_context_file_path_)) {
+      throw std::runtime_error("Failed to create directory " + ep_context_file_path_);
+    }
+  }
+
+  // If dump_ep_context_model is enable, TRT EP forces cache_path_ to be the relative path of ep_context_file_path_.
+  // The cache path will be saved as the "ep_cache_context" node attritue of the EP context node.
+  // For security reason, it needs to make sure the engine cache is saved inside context model directory.
+  if (dump_ep_context_model_ && engine_cache_enable_) {
+    if (IsAbsolutePath(cache_path_)) {
+      LOGS_DEFAULT(ERROR) << "In the case of dumping context model and for security purpose, the trt_engine_cache_path should be set with a relative path, but it is an absolute path:  " << cache_path_;
+    }
+    if (IsRelativePathToParentPath(cache_path_)) {
+      LOGS_DEFAULT(ERROR) << "In the case of dumping context model and for security purpose, The trt_engine_cache_path has '..', it's not allowed to point outside the directory.";
+    }
+
+    // Make cache_path_ to be the relative path of ep_context_file_path_
+    cache_path_ = GetPathOrParentPathOfCtxModel(ep_context_file_path_).append(cache_path_).string();
+  }
+
   {
     auto lock = GetApiLock();
     runtime_ = std::unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(GetTensorrtLogger()));
@@ -2852,7 +2874,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
 
   // Generate file name for dumping ep context model
   if (dump_ep_context_model_ && ctx_model_path_.empty()) {
-    ctx_model_path_ = GetCtxNodeModelPath(ep_context_file_path_, engine_cache_path, model_path_);
+    ctx_model_path_ = GetCtxModelPath(ep_context_file_path_, model_path_);
   }
 
   if (!has_dynamic_shape) {
@@ -2991,14 +3013,14 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
         }
         // dump EP context node model
         if (dump_ep_context_model_) {
-          std::unique_ptr<ONNX_NAMESPACE::ModelProto> model_proto{CreateCtxNodeModel(graph_body_viewer,
+          std::unique_ptr<ONNX_NAMESPACE::ModelProto> model_proto{CreateCtxModel(graph_body_viewer,
                                                                                      engine_cache_path,
                                                                                      reinterpret_cast<char*>(serialized_engine->data()),
                                                                                      serialized_engine->size(),
                                                                                      ep_context_embed_mode_,
                                                                                      compute_capability_,
                                                                                      GetLogger())};
-          DumpCtxNodeModel(model_proto.get(), ctx_model_path_);
+          DumpCtxModel(model_proto.get(), ctx_model_path_);
         }
       }
     }
@@ -3058,7 +3080,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
   // TRT EP will serialize the model at inference time due to engine can be updated and the updated engine should be included in the model.
   // However, if the embed_mode is 0 (only includes engine path), TRT EP will serialize it here.
   if (dump_ep_context_model_ && has_dynamic_shape) {
-    model_proto_.reset(CreateCtxNodeModel(graph_body_viewer,
+    model_proto_.reset(CreateCtxModel(graph_body_viewer,
                                           engine_cache_path,
                                           nullptr,
                                           0,
@@ -3066,7 +3088,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
                                           compute_capability_,
                                           GetLogger()));
     if (ep_context_embed_mode_ == 0) {
-      DumpCtxNodeModel(model_proto_.get(), ctx_model_path_);
+      DumpCtxModel(model_proto_.get(), ctx_model_path_);
     }
   }
 
@@ -3387,7 +3409,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
       // dump ep context model
       if (dump_ep_context_model_ && ep_context_embed_mode_) {
         UpdateCtxNodeModelEngineContext(model_proto_.get(), reinterpret_cast<char*>(serialized_engine->data()), serialized_engine->size());
-        DumpCtxNodeModel(model_proto_.get(), ctx_model_path_);
+        DumpCtxModel(model_proto_.get(), ctx_model_path_);
       }
       context_update = true;
     }
@@ -3580,7 +3602,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con
   std::unordered_map<std::string, size_t> output_types;    // TRT engine output name -> ORT output tensor type
 
   // Get engine binary data and deserialize it
-  auto trt_cache_model_handler = TensorRTCacheModelHandler(&trt_engine, runtime_.get(), compute_capability_);
+  auto trt_cache_model_handler = TensorRTCacheModelHandler(&trt_engine, runtime_.get(), model_path_, compute_capability_);
   auto status = trt_cache_model_handler.GetEpContextFromGraph(graph_body_viewer);
   if (status != Status::OK()) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, status.ErrorMessage());

From cbfee7f49a38c20998c7c72cbab14a6d315d95f4 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 19 Jan 2024 07:01:01 +0000
Subject: [PATCH 17/25] lintrunner -a

---
 .../tensorrt/onnx_ctx_model_helper.cc         | 22 ++++++++--------
 .../tensorrt/onnx_ctx_model_helper.h          | 16 ++++++------
 .../tensorrt/tensorrt_execution_provider.cc   | 26 +++++++++----------
 .../core/session/provider_bridge_ort.cc       |  2 --
 4 files changed, 32 insertions(+), 34 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
index 1c64df344e475..47bcdb58a8a72 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
@@ -63,12 +63,12 @@ void UpdateCtxNodeModelEngineContext(ONNX_NAMESPACE::ModelProto* model_proto,
  * Create "EP context node" model where engine information is embedded
  */
 ONNX_NAMESPACE::ModelProto* CreateCtxModel(const GraphViewer& graph_viewer,
-                                               const std::string engine_cache_path,
-                                               char* engine_data,
-                                               size_t size,
-                                               const int64_t embed_mode,
-                                               std::string compute_capability,
-                                               const logging::Logger* logger) {
+                                           const std::string engine_cache_path,
+                                           char* engine_data,
+                                           size_t size,
+                                           const int64_t embed_mode,
+                                           std::string compute_capability,
+                                           const logging::Logger* logger) {
   auto model_build = graph_viewer.CreateModel(*logger);
   auto& graph_build = model_build->MainGraph();
 
@@ -199,7 +199,7 @@ std::string GetCtxModelPath(const std::string& ep_context_file_path,
  *
  */
 void DumpCtxModel(ONNX_NAMESPACE::ModelProto* model_proto,
-                      const std::string& ctx_model_path) {
+                  const std::string& ctx_model_path) {
   std::fstream dump(ctx_model_path, std::ios::out | std::ios::trunc | std::ios::binary);
   model_proto->SerializeToOstream(dump);
   LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Dumped " + ctx_model_path;
@@ -244,7 +244,7 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph
   auto& attrs = node->GetAttributes();
 
   const int64_t embed_mode = attrs.at(EMBED_MODE).i();
-  if (embed_mode) { 
+  if (embed_mode) {
     // Get engine from byte stream.
     const std::string& context_binary = attrs.at(EP_CACHE_CONTEXT).s();
     *(trt_engine_) = std::unique_ptr<nvinfer1::ICudaEngine>(trt_runtime_->deserializeCudaEngine(const_cast<char*>(context_binary.c_str()),
@@ -254,7 +254,7 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph
       return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
                              "TensorRT EP could not deserialize engine from binary data");
     }
-  } else { 
+  } else {
     // Get engine from cache file.
     std::string cache_path = attrs.at(EP_CACHE_CONTEXT).s();
 
@@ -281,8 +281,8 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph
     *(trt_engine_) = std::unique_ptr<nvinfer1::ICudaEngine>(trt_runtime_->deserializeCudaEngine(engine_buf.get(), engine_size));
     if (!(*trt_engine_)) {
       return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
-                             "TensorRT EP could not deserialize engine from cache: " + engine_cache_path.string() + 
-                             ". Please make sure engine cache is inside the directory of trt_ep_context_file_path.");
+                             "TensorRT EP could not deserialize engine from cache: " + engine_cache_path.string() +
+                                 ". Please make sure engine cache is inside the directory of trt_ep_context_file_path.");
     }
     LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path.string();
   }
diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
index 8ff686c859d03..bf3bf9e3495d7 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
@@ -25,18 +25,18 @@ bool GraphHasCtxNode(const GraphViewer& graph_viewer);
 const onnxruntime::Path& GetModelPath(const GraphViewer& graph_viewer);
 std::filesystem::path GetPathOrParentPathOfCtxModel(const std::string& ep_context_file_path);
 ONNX_NAMESPACE::ModelProto* CreateCtxModel(const GraphViewer& graph_viewer,
-                                               const std::string engine_cache_path,
-                                               char* engine_data,
-                                               size_t size,
-                                               const int64_t embed_mode,
-                                               std::string compute_capability,
-                                               const logging::Logger* logger);
+                                           const std::string engine_cache_path,
+                                           char* engine_data,
+                                           size_t size,
+                                           const int64_t embed_mode,
+                                           std::string compute_capability,
+                                           const logging::Logger* logger);
 std::string GetCtxModelPath(const std::string& ep_context_file_path,
                             const std::string& original_model_path);
 bool IsAbsolutePath(std::string& path_string);
 bool IsRelativePathToParentPath(std::string& path_string);
 void DumpCtxModel(ONNX_NAMESPACE::ModelProto* model_proto,
-                      const std::string& ctx_model_path);
+                  const std::string& ctx_model_path);
 void UpdateCtxNodeModelEngineContext(ONNX_NAMESPACE::ModelProto* model_proto,
                                      char* engine_data,
                                      size_t size);
@@ -57,7 +57,7 @@ class TensorRTCacheModelHandler {
  private:
   std::unique_ptr<nvinfer1::ICudaEngine>* trt_engine_;
   nvinfer1::IRuntime* trt_runtime_;
-  std::string ep_context_model_path_; // If using context model, it implies context model and engine cache is in the same directory
+  std::string ep_context_model_path_;  // If using context model, it implies context model and engine cache is in the same directory
   std::string compute_capability_;
 };  // TRTCacheModelHandler
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index 27df32c9a17fd..8150fd19baa11 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -1458,7 +1458,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
       if (!timing_force_match_env.empty()) {
         force_timing_cache_match_ = (std::stoi(timing_force_match_env) == 0 ? false : true);
       }
-     
+
       const std::string dump_ep_context_model_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDumpEpContextModel);
       if (!dump_ep_context_model_env.empty()) {
         dump_ep_context_model_ = (std::stoi(dump_ep_context_model_env) == 0 ? false : true);
@@ -3014,12 +3014,12 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
         // dump EP context node model
         if (dump_ep_context_model_) {
           std::unique_ptr<ONNX_NAMESPACE::ModelProto> model_proto{CreateCtxModel(graph_body_viewer,
-                                                                                     engine_cache_path,
-                                                                                     reinterpret_cast<char*>(serialized_engine->data()),
-                                                                                     serialized_engine->size(),
-                                                                                     ep_context_embed_mode_,
-                                                                                     compute_capability_,
-                                                                                     GetLogger())};
+                                                                                 engine_cache_path,
+                                                                                 reinterpret_cast<char*>(serialized_engine->data()),
+                                                                                 serialized_engine->size(),
+                                                                                 ep_context_embed_mode_,
+                                                                                 compute_capability_,
+                                                                                 GetLogger())};
           DumpCtxModel(model_proto.get(), ctx_model_path_);
         }
       }
@@ -3081,12 +3081,12 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
   // However, if the embed_mode is 0 (only includes engine path), TRT EP will serialize it here.
   if (dump_ep_context_model_ && has_dynamic_shape) {
     model_proto_.reset(CreateCtxModel(graph_body_viewer,
-                                          engine_cache_path,
-                                          nullptr,
-                                          0,
-                                          ep_context_embed_mode_,
-                                          compute_capability_,
-                                          GetLogger()));
+                                      engine_cache_path,
+                                      nullptr,
+                                      0,
+                                      ep_context_embed_mode_,
+                                      compute_capability_,
+                                      GetLogger()));
     if (ep_context_embed_mode_ == 0) {
       DumpCtxModel(model_proto_.get(), ctx_model_path_);
     }
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 1b3b7de7d89c5..3269c9f0f4e4b 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1759,8 +1759,6 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In
   factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options);
 #endif
 
-
-
   if (!factory) {
     return OrtApis::CreateStatus(ORT_FAIL, "SessionOptionsAppendExecutionProvider_Tensorrt: Failed to load shared library");
   }

From d0e7a488fde4ef9f16d076eefe37d8b95f672d78 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 19 Jan 2024 17:02:15 +0000
Subject: [PATCH 18/25] update

---
 .../tensorrt/onnx_ctx_model_helper.cc         |  6 +--
 .../tensorrt/tensorrt_execution_provider.cc   | 47 ++++++++++---------
 .../providers/tensorrt/tensorrt_basic_test.cc | 27 ++++++-----
 3 files changed, 44 insertions(+), 36 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
index 47bcdb58a8a72..0f659c91ed800 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
@@ -158,7 +158,7 @@ std::filesystem::path GetPathOrParentPathOfCtxModel(const std::string& ep_contex
  *   to the same directory provided by ep_context_file_path_. (i.e. engine_cache_path_ = ep_context_file_path_)
  *
  * Example 1:
- * ep_context_file_path = "/home/user/ep_context_model_foler"
+ * ep_context_file_path = "/home/user/ep_context_model_directory"
  * original_model_path = "model.onnx"
  * => return "/home/user/ep_context_model_folder/model_ctx.onnx"
  *
@@ -168,9 +168,9 @@ std::filesystem::path GetPathOrParentPathOfCtxModel(const std::string& ep_contex
  * => return "my_ctx_model.onnx"
  *
  * Example 3:
- * ep_context_file_path = "/home/user2/ep_context_model_foler/my_ctx_model.onnx"
+ * ep_context_file_path = "/home/user2/ep_context_model_directory/my_ctx_model.onnx"
  * original_model_path = "model.onnx"
- * => return "/home/user2/ep_context_model_foler/my_ctx_model.onnx"
+ * => return "/home/user2/ep_context_model_directory/my_ctx_model.onnx"
  *
  */
 std::string GetCtxModelPath(const std::string& ep_context_file_path,
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index 8150fd19baa11..b8be0c0b0f766 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -1578,6 +1578,31 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     dla_core_ = 0;
   }
 
+  // If dump_ep_context_model_ is enable, TRT EP forces cache_path_ to be the relative path of ep_context_file_path_.
+  // For example, 
+  //    - original cache path = "engine_cache_dir" -> new cache path = "./context_model_dir/engine_cache_dir"
+  //    - original cache path = ""                 -> new cache path = "./context_model_dir"
+  // The new cache path will be saved as the "ep_cache_context" node attritue of the EP context node.
+  // For security reason, it needs to make sure the engine cache is saved inside context model directory.
+  if (dump_ep_context_model_ && engine_cache_enable_) {
+    if (IsAbsolutePath(cache_path_)) {
+      LOGS_DEFAULT(ERROR) << "In the case of dumping context model and for security purpose, the trt_engine_cache_path should be set with a relative path, but it is an absolute path:  " << cache_path_;
+    }
+    if (IsRelativePathToParentPath(cache_path_)) {
+      LOGS_DEFAULT(ERROR) << "In the case of dumping context model and for security purpose, The trt_engine_cache_path has '..', it's not allowed to point outside the directory.";
+    }
+
+    // Make cache_path_ to be the relative path of ep_context_file_path_
+    cache_path_ = GetPathOrParentPathOfCtxModel(ep_context_file_path_).append(cache_path_).string();
+  }
+
+  // If ep_context_file_path_ is provided as a directory, create it if it's not existed
+  if (!ep_context_file_path_.empty() && std::filesystem::path(ep_context_file_path_).extension().empty() && !std::filesystem::is_directory(ep_context_file_path_)) {
+    if (!std::filesystem::create_directory(ep_context_file_path_)) {
+      throw std::runtime_error("Failed to create directory " + ep_context_file_path_);
+    }
+  }
+
   if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
     if (!cache_path_.empty() && !fs::is_directory(cache_path_)) {
       if (!fs::create_directory(cache_path_)) {
@@ -1655,28 +1680,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     }
   }
 
-  // If ep_context_file_path_ is provided as a directory, create it if it's not existed
-  if (!ep_context_file_path_.empty() && std::filesystem::path(ep_context_file_path_).extension().empty() && !std::filesystem::is_directory(ep_context_file_path_)) {
-    if (!std::filesystem::create_directory(ep_context_file_path_)) {
-      throw std::runtime_error("Failed to create directory " + ep_context_file_path_);
-    }
-  }
-
-  // If dump_ep_context_model is enable, TRT EP forces cache_path_ to be the relative path of ep_context_file_path_.
-  // The cache path will be saved as the "ep_cache_context" node attritue of the EP context node.
-  // For security reason, it needs to make sure the engine cache is saved inside context model directory.
-  if (dump_ep_context_model_ && engine_cache_enable_) {
-    if (IsAbsolutePath(cache_path_)) {
-      LOGS_DEFAULT(ERROR) << "In the case of dumping context model and for security purpose, the trt_engine_cache_path should be set with a relative path, but it is an absolute path:  " << cache_path_;
-    }
-    if (IsRelativePathToParentPath(cache_path_)) {
-      LOGS_DEFAULT(ERROR) << "In the case of dumping context model and for security purpose, The trt_engine_cache_path has '..', it's not allowed to point outside the directory.";
-    }
-
-    // Make cache_path_ to be the relative path of ep_context_file_path_
-    cache_path_ = GetPathOrParentPathOfCtxModel(ep_context_file_path_).append(cache_path_).string();
-  }
-
   {
     auto lock = GetApiLock();
     runtime_ = std::unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(GetTensorrtLogger()));
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index a1d19ecbabdcf..225c920326470 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -394,7 +394,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
   std::vector<int64_t> expected_dims_mul_m = {1, 3, 2};
   std::vector<float> expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f};
 
-  // Test dumping EP context model to provided path
+  // Dump context model with specific name
   OrtTensorRTProviderOptionsV2 params;
   params.trt_engine_cache_enable = 1;
   params.trt_dump_ep_context_model = 1;
@@ -405,29 +405,34 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
   ASSERT_TRUE(status.IsOK());
   status = session_object.Initialize();
   ASSERT_TRUE(status.IsOK());
-  // "EP_Context_model.onnx" should be created
-  ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path));
+  ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path));   // "EP_Context_model.onnx" should be created
 
-  // Test dumping EP context model to provided path
+  // Dump context model to specific path
   InferenceSession session_object2{so, GetEnvironment()};
   OrtTensorRTProviderOptionsV2 params2;
   params2.trt_engine_cache_enable = 1;
   params2.trt_dump_ep_context_model = 1;
-  params2.trt_engine_cache_path = "./trt_engine_cache";
-  params2.trt_ep_context_file_path = "EP_Context_model.onnx";
+  params2.trt_engine_cache_prefix = "TRT_engine_cache";
+  params2.trt_engine_cache_path = "engine_cache_folder"; // due to dump_ep_context_model = 1, the new cache path is ./context_model_folder/engine_cache_folder
+  params2.trt_ep_context_file_path = "./context_model_folder";
   execution_provider = TensorrtExecutionProviderWithOptions(&params2);
   EXPECT_TRUE(session_object2.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
   status = session_object2.Load(model_name);
   ASSERT_TRUE(status.IsOK());
   status = session_object2.Initialize();
   ASSERT_TRUE(status.IsOK());
-  // "./trt_engine_cache/EP_Context_model.onnx" should be created
-  ASSERT_TRUE(HasCacheFileWithPrefix(params2.trt_ep_context_file_path, params2.trt_engine_cache_path));
-
-  // Test EP context model inference
+  auto new_engine_cache_path = std::filesystem::path(params2.trt_ep_context_file_path).append(params2.trt_engine_cache_path).string();
+  // Test engine cache path:
+  // "./context_model_folder/engine_cache_folder/TRT_engine_cache...engine" should be created
+  ASSERT_TRUE(HasCacheFileWithPrefix(params2.trt_engine_cache_prefix, new_engine_cache_path));
+  // Test context model path:
+  // "./context_model_folder/EPContextNode_test_ctx.onnx" should be created
+  ASSERT_TRUE(HasCacheFileWithPrefix("EPContextNode_test_ctx.onnx", params2.trt_ep_context_file_path));
+  
+  // Context model inference
   InferenceSession session_object3{so, GetEnvironment()};
   OrtTensorRTProviderOptionsV2 params3;
-  model_name = "EP_Context_model.onnx";
+  model_name = params.trt_ep_context_file_path;
   execution_provider = TensorrtExecutionProviderWithOptions(&params3);
   EXPECT_TRUE(session_object3.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
   status = session_object3.Load(model_name);

From 14748c052e9d4bfca7b35cd5165193948eec3602 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 19 Jan 2024 17:22:50 +0000
Subject: [PATCH 19/25] add comment

---
 .../tensorrt/tensorrt_provider_options.h       | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
index 1d9af3f18d184..dc782fd54f1c1 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
@@ -48,8 +48,26 @@ struct OrtTensorRTProviderOptionsV2 {
   const char* trt_profile_max_shapes{nullptr};           // Specify the range of the input shapes to build the engine with
   const char* trt_profile_opt_shapes{nullptr};           // Specify the range of the input shapes to build the engine with
   int trt_cuda_graph_enable{0};                          // Enable CUDA graph in ORT TRT
+  
+  /*
+   * Please note that there are rules for using following context model related provider options:
+   * 
+   * 1. In the case of dumping the context model and loading the context model, 
+   *    for security reason, TRT EP doesn't allow the "ep_cache_context" node attribute of EP context node to be
+   *    the absolute path or relative path that is outside of context model directory.
+   *    It means engine cache needs to be in the same directory or sub-directory of context model.
+   * 
+   * 2. In the case of dumping the context model, the engine cache path will be changed to the relative path of context model directory. 
+   *    For example: 
+   *    If "trt_dump_ep_context_model" is enabled and "trt_engine_cache_enable" is enabled,
+   *       if "trt_ep_context_file_path" is "./context_model_dir",
+   *       - if "trt_engine_cache_path" is "" -> the engine cache will be saved to "./context_model_dir"
+   *       - if "trt_engine_cache_path" is "engine_dir" -> the engine cache will be saved to "./context_model_dir/engine_dir"
+   * 
+   */
   int trt_dump_ep_context_model{0};                      // Dump EP context node model
   const char* trt_ep_context_file_path{nullptr};         // Specify file name to dump EP context node model.
   int trt_ep_context_embed_mode{0};                      // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data
+  
   const char* trt_engine_cache_prefix{nullptr};          // specify engine cache prefix
 };

From 34747accd0c251fe4c7242ae498771f7cab88c3e Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 19 Jan 2024 17:24:12 +0000
Subject: [PATCH 20/25] lintrunner -a

---
 .../tensorrt/tensorrt_provider_options.h      | 24 +++++++++----------
 .../tensorrt/tensorrt_execution_provider.cc   |  2 +-
 .../providers/tensorrt/tensorrt_basic_test.cc |  6 ++---
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
index dc782fd54f1c1..0e0c184934582 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
@@ -48,26 +48,26 @@ struct OrtTensorRTProviderOptionsV2 {
   const char* trt_profile_max_shapes{nullptr};           // Specify the range of the input shapes to build the engine with
   const char* trt_profile_opt_shapes{nullptr};           // Specify the range of the input shapes to build the engine with
   int trt_cuda_graph_enable{0};                          // Enable CUDA graph in ORT TRT
-  
+
   /*
    * Please note that there are rules for using following context model related provider options:
-   * 
-   * 1. In the case of dumping the context model and loading the context model, 
+   *
+   * 1. In the case of dumping the context model and loading the context model,
    *    for security reason, TRT EP doesn't allow the "ep_cache_context" node attribute of EP context node to be
    *    the absolute path or relative path that is outside of context model directory.
    *    It means engine cache needs to be in the same directory or sub-directory of context model.
-   * 
-   * 2. In the case of dumping the context model, the engine cache path will be changed to the relative path of context model directory. 
-   *    For example: 
+   *
+   * 2. In the case of dumping the context model, the engine cache path will be changed to the relative path of context model directory.
+   *    For example:
    *    If "trt_dump_ep_context_model" is enabled and "trt_engine_cache_enable" is enabled,
    *       if "trt_ep_context_file_path" is "./context_model_dir",
    *       - if "trt_engine_cache_path" is "" -> the engine cache will be saved to "./context_model_dir"
    *       - if "trt_engine_cache_path" is "engine_dir" -> the engine cache will be saved to "./context_model_dir/engine_dir"
-   * 
+   *
    */
-  int trt_dump_ep_context_model{0};                      // Dump EP context node model
-  const char* trt_ep_context_file_path{nullptr};         // Specify file name to dump EP context node model.
-  int trt_ep_context_embed_mode{0};                      // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data
-  
-  const char* trt_engine_cache_prefix{nullptr};          // specify engine cache prefix
+  int trt_dump_ep_context_model{0};               // Dump EP context node model
+  const char* trt_ep_context_file_path{nullptr};  // Specify file name to dump EP context node model.
+  int trt_ep_context_embed_mode{0};               // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data
+
+  const char* trt_engine_cache_prefix{nullptr};  // specify engine cache prefix
 };
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index b8be0c0b0f766..ec36660a7e6d6 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -1579,7 +1579,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
   }
 
   // If dump_ep_context_model_ is enable, TRT EP forces cache_path_ to be the relative path of ep_context_file_path_.
-  // For example, 
+  // For example,
   //    - original cache path = "engine_cache_dir" -> new cache path = "./context_model_dir/engine_cache_dir"
   //    - original cache path = ""                 -> new cache path = "./context_model_dir"
   // The new cache path will be saved as the "ep_cache_context" node attritue of the EP context node.
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 225c920326470..048a4de1685cd 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -405,7 +405,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
   ASSERT_TRUE(status.IsOK());
   status = session_object.Initialize();
   ASSERT_TRUE(status.IsOK());
-  ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path));   // "EP_Context_model.onnx" should be created
+  ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path));  // "EP_Context_model.onnx" should be created
 
   // Dump context model to specific path
   InferenceSession session_object2{so, GetEnvironment()};
@@ -413,7 +413,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
   params2.trt_engine_cache_enable = 1;
   params2.trt_dump_ep_context_model = 1;
   params2.trt_engine_cache_prefix = "TRT_engine_cache";
-  params2.trt_engine_cache_path = "engine_cache_folder"; // due to dump_ep_context_model = 1, the new cache path is ./context_model_folder/engine_cache_folder
+  params2.trt_engine_cache_path = "engine_cache_folder";  // due to dump_ep_context_model = 1, the new cache path is ./context_model_folder/engine_cache_folder
   params2.trt_ep_context_file_path = "./context_model_folder";
   execution_provider = TensorrtExecutionProviderWithOptions(&params2);
   EXPECT_TRUE(session_object2.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
@@ -428,7 +428,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
   // Test context model path:
   // "./context_model_folder/EPContextNode_test_ctx.onnx" should be created
   ASSERT_TRUE(HasCacheFileWithPrefix("EPContextNode_test_ctx.onnx", params2.trt_ep_context_file_path));
-  
+
   // Context model inference
   InferenceSession session_object3{so, GetEnvironment()};
   OrtTensorRTProviderOptionsV2 params3;

From 22045cc4892453da125c75c4d83290a8e844a94c Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 19 Jan 2024 22:38:24 +0000
Subject: [PATCH 21/25] add unit test

---
 .../tensorrt/tensorrt_provider_options.h      |  2 +-
 .../tensorrt/onnx_ctx_model_helper.cc         |  9 ++-
 .../tensorrt/tensorrt_execution_provider.cc   | 44 +++++++-----
 .../tensorrt/tensorrt_execution_provider.h    |  3 +-
 .../providers/tensorrt/tensorrt_basic_test.cc | 67 +++++++++++++++++--
 5 files changed, 99 insertions(+), 26 deletions(-)

diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
index 0e0c184934582..32a9f06464ace 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
@@ -66,7 +66,7 @@ struct OrtTensorRTProviderOptionsV2 {
    *
    */
   int trt_dump_ep_context_model{0};               // Dump EP context node model
-  const char* trt_ep_context_file_path{nullptr};  // Specify file name to dump EP context node model.
+  const char* trt_ep_context_file_path{nullptr};  // Specify file name to dump EP context node model. Can be a path or a file name or a file name with path.
   int trt_ep_context_embed_mode{0};               // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data
 
   const char* trt_engine_cache_prefix{nullptr};  // specify engine cache prefix
diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
index 0f659c91ed800..1994d1f5ab0b8 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
@@ -272,6 +272,12 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph
     std::filesystem::path ctx_model_dir(GetPathOrParentPathOfCtxModel(ep_context_model_path_));
     auto engine_cache_path = ctx_model_dir.append(cache_path);
 
+    if (!std::filesystem::exists(engine_cache_path)) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
+                             "TensorRT EP can't find engine cache: " + engine_cache_path.string() +
+                                 ". Please make sure engine cache is in the same directory or sub-directory of context model.");
+    }
+
     std::ifstream engine_file(engine_cache_path.string(), std::ios::binary | std::ios::in);
     engine_file.seekg(0, std::ios::end);
     size_t engine_size = engine_file.tellg();
@@ -281,8 +287,7 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph
     *(trt_engine_) = std::unique_ptr<nvinfer1::ICudaEngine>(trt_runtime_->deserializeCudaEngine(engine_buf.get(), engine_size));
     if (!(*trt_engine_)) {
       return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
-                             "TensorRT EP could not deserialize engine from cache: " + engine_cache_path.string() +
-                                 ". Please make sure engine cache is inside the directory of trt_ep_context_file_path.");
+                             "TensorRT EP could not deserialize engine from cache: " + engine_cache_path.string());
     }
     LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path.string();
   }
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index ec36660a7e6d6..23417e668f34a 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -1578,6 +1578,13 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     dla_core_ = 0;
   }
 
+  // If ep_context_file_path_ is provided as a directory, create it if it's not existed
+  if (dump_ep_context_model_ && !ep_context_file_path_.empty() && std::filesystem::path(ep_context_file_path_).extension().empty() && !std::filesystem::is_directory(ep_context_file_path_)) {
+    if (!std::filesystem::create_directory(ep_context_file_path_)) {
+      throw std::runtime_error("Failed to create directory " + ep_context_file_path_);
+    }
+  }
+
   // If dump_ep_context_model_ is enable, TRT EP forces cache_path_ to be the relative path of ep_context_file_path_.
   // For example,
   //    - original cache path = "engine_cache_dir" -> new cache path = "./context_model_dir/engine_cache_dir"
@@ -1596,13 +1603,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     cache_path_ = GetPathOrParentPathOfCtxModel(ep_context_file_path_).append(cache_path_).string();
   }
 
-  // If ep_context_file_path_ is provided as a directory, create it if it's not existed
-  if (!ep_context_file_path_.empty() && std::filesystem::path(ep_context_file_path_).extension().empty() && !std::filesystem::is_directory(ep_context_file_path_)) {
-    if (!std::filesystem::create_directory(ep_context_file_path_)) {
-      throw std::runtime_error("Failed to create directory " + ep_context_file_path_);
-    }
-  }
-
   if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
     if (!cache_path_.empty() && !fs::is_directory(cache_path_)) {
       if (!fs::create_directory(cache_path_)) {
@@ -2335,6 +2335,14 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
   // Construct subgraph capability from node list
   std::vector<std::unique_ptr<ComputeCapability>> result;
 
+  // Get ModelPath
+  const auto& path_string = graph.ModelPath().ToPathString();
+#ifdef _WIN32
+  wcstombs_s(nullptr, model_path_, sizeof(model_path_), path_string.c_str(), sizeof(model_path_));
+#else
+  strcpy(model_path_, path_string.c_str());
+#endif
+
   // If the model consists of only a single "EPContext" contrib op, it means TRT EP can fetch the precompiled engine info from the node and
   // load the engine directly without having to go through the processes of graph proto reconstruction, calling TRT parser and engine compilation.
   // So, simply return the ComputeCapability here.
@@ -2345,14 +2353,6 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
     return result;
   }
 
-  // Get ModelPath
-  const auto& path_string = graph.ModelPath().ToPathString();
-#ifdef _WIN32
-  wcstombs_s(nullptr, model_path_, sizeof(model_path_), path_string.c_str(), sizeof(model_path_));
-#else
-  strcpy(model_path_, path_string.c_str());
-#endif
-
   // Generate unique kernel name for TRT graph
   HashValue model_hash = TRTGenerateId(graph);
 
@@ -3016,8 +3016,14 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
         }
         // dump EP context node model
         if (dump_ep_context_model_) {
+
+          // "ep_cache_context" node attribute should be a relative path to context model directory
+          if (ep_cache_context_attr_.empty()) {
+            ep_cache_context_attr_ = std::filesystem::relative(engine_cache_path, ep_context_file_path_).string();
+          }
+
           std::unique_ptr<ONNX_NAMESPACE::ModelProto> model_proto{CreateCtxModel(graph_body_viewer,
-                                                                                 engine_cache_path,
+                                                                                 ep_cache_context_attr_,
                                                                                  reinterpret_cast<char*>(serialized_engine->data()),
                                                                                  serialized_engine->size(),
                                                                                  ep_context_embed_mode_,
@@ -3083,8 +3089,12 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
   // TRT EP will serialize the model at inference time due to engine can be updated and the updated engine should be included in the model.
   // However, if the embed_mode is 0 (only includes engine path), TRT EP will serialize it here.
   if (dump_ep_context_model_ && has_dynamic_shape) {
+    // "ep_cache_context" node attribute should be a relative path to context model directory
+    if (ep_cache_context_attr_.empty()) {
+      ep_cache_context_attr_ = std::filesystem::relative(engine_cache_path, ep_context_file_path_).string();
+    }
     model_proto_.reset(CreateCtxModel(graph_body_viewer,
-                                      engine_cache_path,
+                                      ep_cache_context_attr_,
                                       nullptr,
                                       0,
                                       ep_context_embed_mode_,
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
index 86645fabd36d9..70b71aa221eef 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -293,7 +293,6 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   bool force_timing_cache_match_ = false;
   bool detailed_build_log_ = false;
   bool cuda_graph_enable_ = false;
-  std::string ctx_model_path_;
   std::string cache_prefix_;
 
   // The OrtAllocator object will be get during ep compute time
@@ -304,6 +303,8 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   bool dump_ep_context_model_ = false;
   std::string ep_context_file_path_;
   int ep_context_embed_mode_ = 0;
+  std::string ctx_model_path_;
+  std::string ep_cache_context_attr_;
   std::unique_ptr<ONNX_NAMESPACE::ModelProto> model_proto_ = ONNX_NAMESPACE::ModelProto::Create();
 
   std::unordered_set<std::string> control_flow_op_set_ = {"If", "Loop", "Scan"};
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 048a4de1685cd..69834b934b0f8 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -394,7 +394,16 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
   std::vector<int64_t> expected_dims_mul_m = {1, 3, 2};
   std::vector<float> expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f};
 
-  // Dump context model with specific name
+  /* 
+   * Test case 1: Dump context model
+   * 
+   * provider options=>
+   *   trt_ep_context_file_path = "EP_Context_model.onnx"
+   * 
+   * expected result =>
+   *   context model "EP_Context_model.onnx" should be created in current directory
+   * 
+   */ 
   OrtTensorRTProviderOptionsV2 params;
   params.trt_engine_cache_enable = 1;
   params.trt_dump_ep_context_model = 1;
@@ -405,16 +414,27 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
   ASSERT_TRUE(status.IsOK());
   status = session_object.Initialize();
   ASSERT_TRUE(status.IsOK());
-  ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path));  // "EP_Context_model.onnx" should be created
+  ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path));
 
-  // Dump context model to specific path
+  /*
+   * Test case 2: Dump context model
+   *
+   * provider options=>
+   *   trt_engine_cache_prefix = "TRT_engine_cache"
+   *   trt_ep_context_file_path = "context_model_folder"
+   *   trt_engine_cache_path = "engine_cache_folder"
+   * 
+   * expected result =>
+   *   engine cache "./context_model_folder/engine_cache_folder/TRT_engine_cache...engine" should be created
+   *   context model "./context_model_folder/EPContextNode_test_ctx.onnx" should be created
+   */ 
   InferenceSession session_object2{so, GetEnvironment()};
   OrtTensorRTProviderOptionsV2 params2;
   params2.trt_engine_cache_enable = 1;
   params2.trt_dump_ep_context_model = 1;
   params2.trt_engine_cache_prefix = "TRT_engine_cache";
   params2.trt_engine_cache_path = "engine_cache_folder";  // due to dump_ep_context_model = 1, the new cache path is ./context_model_folder/engine_cache_folder
-  params2.trt_ep_context_file_path = "./context_model_folder";
+  params2.trt_ep_context_file_path = "context_model_folder";
   execution_provider = TensorrtExecutionProviderWithOptions(&params2);
   EXPECT_TRUE(session_object2.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
   status = session_object2.Load(model_name);
@@ -429,7 +449,16 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
   // "./context_model_folder/EPContextNode_test_ctx.onnx" should be created
   ASSERT_TRUE(HasCacheFileWithPrefix("EPContextNode_test_ctx.onnx", params2.trt_ep_context_file_path));
 
-  // Context model inference
+  /*
+   * Test case 3: Run the dumped context model
+   *
+   * context model path = "./EP_Context_model.onnx" (created from case 1)
+   * 
+   * expected result=>
+   *   engine cache is also in the same current dirctory as "./xxxxx.engine"
+   *   and the "ep_cache_context" attribute node of the context model should point to that.
+   *
+   */
   InferenceSession session_object3{so, GetEnvironment()};
   OrtTensorRTProviderOptionsV2 params3;
   model_name = params.trt_ep_context_file_path;
@@ -447,6 +476,34 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
   // Y: 1, 3, 3, 2, 2, 2
   // Z: 1, 3, 3, 2, 2, 2
   RunSession(session_object3, run_options, feeds, output_names, expected_dims_mul_m, expected_values_mul_m);
+
+  /*
+   * Test case 4: Run the dumped context model
+   *
+   * context model path = "./context_model_folder/EPContextNode_test_ctx.onnx" (created from case 2)
+   * 
+   * expected result=>
+   *   engine cache path is "./context_model_folder/engine_cache_folder/xxxxx.engine"
+   *   and the "ep_cache_context" attribute node of the context model should point to that.
+   *
+   */
+  InferenceSession session_object4{so, GetEnvironment()};
+  OrtTensorRTProviderOptionsV2 params4;
+  model_name = "./context_model_folder/EPContextNode_test_ctx.onnx";
+  execution_provider = TensorrtExecutionProviderWithOptions(&params4);
+  EXPECT_TRUE(session_object4.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
+  status = session_object4.Load(model_name);
+  ASSERT_TRUE(status.IsOK());
+  status = session_object4.Initialize();
+  ASSERT_TRUE(status.IsOK());
+  // run inference
+  // TRT engine will be created and cached
+  // TRT profile will be created and cached only for dynamic input shape
+  // Data in profile,
+  // X: 1, 3, 3, 2, 2, 2
+  // Y: 1, 3, 3, 2, 2, 2
+  // Z: 1, 3, 3, 2, 2, 2
+  RunSession(session_object4, run_options, feeds, output_names, expected_dims_mul_m, expected_values_mul_m);
 }
 
 TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) {

From 4f78c47859b50c14dd742968bf33545d6f50d264 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 19 Jan 2024 22:39:34 +0000
Subject: [PATCH 22/25] lintrunner -a

---
 .../tensorrt/tensorrt_execution_provider.cc    |  1 -
 .../providers/tensorrt/tensorrt_basic_test.cc  | 18 +++++++++---------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index 23417e668f34a..f95009eb8643d 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -3016,7 +3016,6 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
         }
         // dump EP context node model
         if (dump_ep_context_model_) {
-
           // "ep_cache_context" node attribute should be a relative path to context model directory
           if (ep_cache_context_attr_.empty()) {
             ep_cache_context_attr_ = std::filesystem::relative(engine_cache_path, ep_context_file_path_).string();
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 69834b934b0f8..b748f3f079ba4 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -394,16 +394,16 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
   std::vector<int64_t> expected_dims_mul_m = {1, 3, 2};
   std::vector<float> expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f};
 
-  /* 
+  /*
    * Test case 1: Dump context model
-   * 
+   *
    * provider options=>
    *   trt_ep_context_file_path = "EP_Context_model.onnx"
-   * 
+   *
    * expected result =>
    *   context model "EP_Context_model.onnx" should be created in current directory
-   * 
-   */ 
+   *
+   */
   OrtTensorRTProviderOptionsV2 params;
   params.trt_engine_cache_enable = 1;
   params.trt_dump_ep_context_model = 1;
@@ -423,11 +423,11 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
    *   trt_engine_cache_prefix = "TRT_engine_cache"
    *   trt_ep_context_file_path = "context_model_folder"
    *   trt_engine_cache_path = "engine_cache_folder"
-   * 
+   *
    * expected result =>
    *   engine cache "./context_model_folder/engine_cache_folder/TRT_engine_cache...engine" should be created
    *   context model "./context_model_folder/EPContextNode_test_ctx.onnx" should be created
-   */ 
+   */
   InferenceSession session_object2{so, GetEnvironment()};
   OrtTensorRTProviderOptionsV2 params2;
   params2.trt_engine_cache_enable = 1;
@@ -453,7 +453,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
    * Test case 3: Run the dumped context model
    *
    * context model path = "./EP_Context_model.onnx" (created from case 1)
-   * 
+   *
    * expected result=>
    *   engine cache is also in the same current dirctory as "./xxxxx.engine"
    *   and the "ep_cache_context" attribute node of the context model should point to that.
@@ -481,7 +481,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
    * Test case 4: Run the dumped context model
    *
    * context model path = "./context_model_folder/EPContextNode_test_ctx.onnx" (created from case 2)
-   * 
+   *
    * expected result=>
    *   engine cache path is "./context_model_folder/engine_cache_folder/xxxxx.engine"
    *   and the "ep_cache_context" attribute node of the context model should point to that.

From da1207f4018204091b140592df24a00a66982528 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 20 Jan 2024 00:19:51 +0000
Subject: [PATCH 23/25] fix bug for unit test

---
 .../providers/tensorrt/tensorrt_basic_test.cc | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index b748f3f079ba4..73e0cf59d198c 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -462,6 +462,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
   InferenceSession session_object3{so, GetEnvironment()};
   OrtTensorRTProviderOptionsV2 params3;
   model_name = params.trt_ep_context_file_path;
+  params3.trt_engine_cache_enable = 1;
   execution_provider = TensorrtExecutionProviderWithOptions(&params3);
   EXPECT_TRUE(session_object3.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
   status = session_object3.Load(model_name);
@@ -504,6 +505,43 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
   // Y: 1, 3, 3, 2, 2, 2
   // Z: 1, 3, 3, 2, 2, 2
   RunSession(session_object4, run_options, feeds, output_names, expected_dims_mul_m, expected_values_mul_m);
+
+  /*
+   * Test case 5: Dump context model with embed_model = 1
+   */
+  InferenceSession session_object5{so, GetEnvironment()};
+  OrtTensorRTProviderOptionsV2 params5;
+  params5.trt_dump_ep_context_model = 1;
+  params5.trt_ep_context_embed_mode = 1;
+  params5.trt_ep_context_file_path = "EP_Context_model_2.onnx";
+  model_name = "EPContextNode_test.onnx";
+  execution_provider = TensorrtExecutionProviderWithOptions(&params5);
+  EXPECT_TRUE(session_object5.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
+  status = session_object5.Load(model_name);
+  ASSERT_TRUE(status.IsOK());
+  status = session_object5.Initialize();
+  ASSERT_TRUE(status.IsOK());
+
+  /*
+   * Test case 6: Run context model with embed_model = 1 (created from case 5)
+   */
+  InferenceSession session_object6{so, GetEnvironment()};
+  OrtTensorRTProviderOptionsV2 params6;
+  model_name = params5.trt_ep_context_file_path;
+  execution_provider = TensorrtExecutionProviderWithOptions(&params6);
+  EXPECT_TRUE(session_object6.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
+  status = session_object6.Load(model_name);
+  ASSERT_TRUE(status.IsOK());
+  status = session_object6.Initialize();
+  ASSERT_TRUE(status.IsOK());
+  // run inference
+  // TRT engine will be created and cached
+  // TRT profile will be created and cached only for dynamic input shape
+  // Data in profile,
+  // X: 1, 3, 3, 2, 2, 2
+  // Y: 1, 3, 3, 2, 2, 2
+  // Z: 1, 3, 3, 2, 2, 2
+  RunSession(session_object6, run_options, feeds, output_names, expected_dims_mul_m, expected_values_mul_m);
 }
 
 TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) {

From ec7c8f3a270c32acb8b8f3d1e42e306026868208 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 20 Jan 2024 06:56:06 +0000
Subject: [PATCH 24/25] handle relative path for 'ep_cache_context' node
 attribute

---
 .../providers/tensorrt/tensorrt_execution_provider.cc  | 10 ++++++++--
 .../providers/tensorrt/tensorrt_execution_provider.h   |  1 +
 .../test/providers/tensorrt/tensorrt_basic_test.cc     |  1 +
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index f95009eb8643d..fe6b959b962de 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -1599,6 +1599,10 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
       LOGS_DEFAULT(ERROR) << "In the case of dumping context model and for security purpose, The trt_engine_cache_path has '..', it's not allowed to point outside the directory.";
     }
 
+    // Engine cache relative path to context model directory.
+    // It's used when dumping the "ep_cache_context" node attribute.
+    engine_cache_relative_path_to_context_model_dir = cache_path_;
+
     // Make cache_path_ to be the relative path of ep_context_file_path_
     cache_path_ = GetPathOrParentPathOfCtxModel(ep_context_file_path_).append(cache_path_).string();
   }
@@ -3018,7 +3022,8 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
         if (dump_ep_context_model_) {
           // "ep_cache_context" node attribute should be a relative path to context model directory
           if (ep_cache_context_attr_.empty()) {
-            ep_cache_context_attr_ = std::filesystem::relative(engine_cache_path, ep_context_file_path_).string();
+            auto cache_file_name = std::filesystem::path(engine_cache_path).filename();
+            ep_cache_context_attr_ = std::filesystem::path(engine_cache_relative_path_to_context_model_dir).append(cache_file_name.string()).string();
           }
 
           std::unique_ptr<ONNX_NAMESPACE::ModelProto> model_proto{CreateCtxModel(graph_body_viewer,
@@ -3090,7 +3095,8 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
   if (dump_ep_context_model_ && has_dynamic_shape) {
     // "ep_cache_context" node attribute should be a relative path to context model directory
     if (ep_cache_context_attr_.empty()) {
-      ep_cache_context_attr_ = std::filesystem::relative(engine_cache_path, ep_context_file_path_).string();
+      auto cache_file_name = std::filesystem::path(engine_cache_path).filename();
+      ep_cache_context_attr_ = std::filesystem::path(engine_cache_relative_path_to_context_model_dir).append(cache_file_name.string()).string();
     }
     model_proto_.reset(CreateCtxModel(graph_body_viewer,
                                       ep_cache_context_attr_,
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
index 70b71aa221eef..ad2d2c55c67e1 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -305,6 +305,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   int ep_context_embed_mode_ = 0;
   std::string ctx_model_path_;
   std::string ep_cache_context_attr_;
+  std::string engine_cache_relative_path_to_context_model_dir;
   std::unique_ptr<ONNX_NAMESPACE::ModelProto> model_proto_ = ONNX_NAMESPACE::ModelProto::Create();
 
   std::unordered_set<std::string> control_flow_op_set_ = {"If", "Loop", "Scan"};
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 73e0cf59d198c..ff95d6e2c235c 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -527,6 +527,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
    */
   InferenceSession session_object6{so, GetEnvironment()};
   OrtTensorRTProviderOptionsV2 params6;
+  params6.trt_ep_context_embed_mode = 1;
   model_name = params5.trt_ep_context_file_path;
   execution_provider = TensorrtExecutionProviderWithOptions(&params6);
   EXPECT_TRUE(session_object6.RegisterExecutionProvider(std::move(execution_provider)).IsOK());

From dccb2da2fab833337da9e192a0738f968e1907bc Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sun, 21 Jan 2024 00:12:42 +0000
Subject: [PATCH 25/25] update unit test comment

---
 onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index ff95d6e2c235c..4d2538c947dcc 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -485,7 +485,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
    *
    * expected result=>
    *   engine cache path is "./context_model_folder/engine_cache_folder/xxxxx.engine"
-   *   and the "ep_cache_context" attribute node of the context model should point to that.
+   *   and the "ep_cache_context" attribute node of the context model should point to "engine_cache_folder/xxxxx.engine".
    *
    */
   InferenceSession session_object4{so, GetEnvironment()};