From 364b1929f6b230e6e56177e9cbb74a74cae8e6a2 Mon Sep 17 00:00:00 2001
From: yf711 <yifanl@microsoft.com>
Date: Mon, 9 Oct 2023 13:38:55 -0700
Subject: [PATCH] Revert "[TensorRT EP] Refactor OrtTensorRTProviderOptions
 initialization and make it easy to add new field (#17617)"

This reverts commit 569876fb16cce5165035cca755c9e5ca2c86dcc6.
---
 .../tensorrt/tensorrt_provider_factory.h      |  14 ++
 .../tensorrt/tensorrt_provider_options.h      |  68 +++---
 .../core/session/onnxruntime_c_api.h          |   8 -
 ...ai_onnxruntime_OrtSession_SessionOptions.c |   1 +
 js/node/src/session_options_helper.cc         |   1 +
 .../tensorrt_execution_provider_custom_ops.cc |  83 +++----
 .../tensorrt_execution_provider_custom_ops.h  |   1 -
 .../tensorrt_execution_provider_info.cc       | 207 ------------------
 .../tensorrt_execution_provider_info.h        |   1 -
 .../tensorrt/tensorrt_provider_factory.cc     | 199 ++++++++++++++---
 .../tensorrt/tensorrt_provider_factory.h      |  17 --
 .../core/session/provider_bridge_ort.cc       |  60 ++---
 .../onnxruntime_inference_collection.py       |  10 -
 .../python/onnxruntime_pybind_state.cc        |  72 +++---
 .../python/onnxruntime_pybind_state_common.h  |   7 -
 onnxruntime/test/providers/cpu/model_tests.cc |   6 +-
 .../providers/tensorrt/tensorrt_basic_test.cc | 144 +++++++++++-
 onnxruntime/test/util/include/providers.h     |   3 +
 tools/ci_build/gen_def.py                     |  14 +-
 ...acts-package-and-publish-steps-windows.yml |   1 +
 .../github/linux/copy_strip_binary.sh         |   1 +
 .../linux/extract_and_bundle_gpu_package.sh   |   1 +
 .../bundle_nuget_with_native_headers.bat      |   1 +
 .../nuget/generate_nuspec_for_native_nuget.py |   9 +-
 tools/nuget/validate_package.py               |   1 +
 25 files changed, 485 insertions(+), 445 deletions(-)
 create mode 100644 include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
 delete mode 100644 onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
new file mode 100644
index 0000000000000..44debc901cb77
--- /dev/null
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
@@ -0,0 +1,14 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "onnxruntime_c_api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
index 8f2b5af870506..e7d0f9f03ade9 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
@@ -11,38 +11,38 @@
 /// User can only get the instance of OrtTensorRTProviderOptionsV2 via CreateTensorRTProviderOptions.
 /// </summary>
 struct OrtTensorRTProviderOptionsV2 {
-  int device_id{0};                                      // cuda device id.
-  int has_user_compute_stream{0};                        // indicator of user specified CUDA compute stream.
-  void* user_compute_stream{nullptr};                    // user specified CUDA compute stream.
-  int trt_max_partition_iterations{1000};                // maximum iterations for TensorRT parser to get capability
-  int trt_min_subgraph_size{1};                          // minimum size of TensorRT subgraphs
-  size_t trt_max_workspace_size{1 << 30};                // maximum workspace size for TensorRT.
-  int trt_fp16_enable{0};                                // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
-  int trt_int8_enable{0};                                // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
-  const char* trt_int8_calibration_table_name{nullptr};  // TensorRT INT8 calibration table name.
-  int trt_int8_use_native_calibration_table{0};          // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
-  int trt_dla_enable{0};                                 // enable DLA. Default 0 = false, nonzero = true
-  int trt_dla_core{0};                                   // DLA core number. Default 0
-  int trt_dump_subgraphs{0};                             // dump TRT subgraph. Default 0 = false, nonzero = true
-  int trt_engine_cache_enable{0};                        // enable engine caching. Default 0 = false, nonzero = true
-  const char* trt_engine_cache_path{nullptr};            // specify engine cache path
-  int trt_engine_decryption_enable{0};                   // enable engine decryption. Default 0 = false, nonzero = true
-  const char* trt_engine_decryption_lib_path{nullptr};   // specify engine decryption library path
-  int trt_force_sequential_engine_build{0};              // force building TensorRT engine sequentially. Default 0 = false, nonzero = true
-  int trt_context_memory_sharing_enable{0};              // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true
-  int trt_layer_norm_fp32_fallback{0};                   // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true
-  int trt_timing_cache_enable{0};                        // enable TensorRT timing cache. Default 0 = false, nonzero = true
-  int trt_force_timing_cache{0};                         // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true
-  int trt_detailed_build_log{0};                         // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true
-  int trt_build_heuristics_enable{0};                    // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true
-  int trt_sparsity_enable{0};                            // Control if sparsity can be used by TRT. Default 0 = false, 1 = true
-  int trt_builder_optimization_level{3};                 // Set the builder optimization level. WARNING: levels below 3 do not guarantee good engine performance, but greatly improve build time.  Default 3, valid range [0-5]
-  int trt_auxiliary_streams{-1};                         // Set maximum number of auxiliary streams per inference stream. Setting this value to 0 will lead to optimal memory usage. Default -1 = heuristics
-  const char* trt_tactic_sources{nullptr};               // pecify the tactics to be used by adding (+) or removing (-) tactics from the default
-                                                         // tactic sources (default = all available tactics) e.g. "-CUDNN,+CUBLAS" available keys: "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS"
-  const char* trt_extra_plugin_lib_paths{nullptr};       // specify extra TensorRT plugin library paths
-  const char* trt_profile_min_shapes{nullptr};           // Specify the range of the input shapes to build the engine with
-  const char* trt_profile_max_shapes{nullptr};           // Specify the range of the input shapes to build the engine with
-  const char* trt_profile_opt_shapes{nullptr};           // Specify the range of the input shapes to build the engine with
-  int trt_cuda_graph_enable{0};                          // Enable CUDA graph in ORT TRT
+  int device_id;                                // cuda device id.
+  int has_user_compute_stream;                  // indicator of user specified CUDA compute stream.
+  void* user_compute_stream;                    // user specified CUDA compute stream.
+  int trt_max_partition_iterations;             // maximum iterations for TensorRT parser to get capability
+  int trt_min_subgraph_size;                    // minimum size of TensorRT subgraphs
+  size_t trt_max_workspace_size;                // maximum workspace size for TensorRT.
+  int trt_fp16_enable;                          // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
+  int trt_int8_enable;                          // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
+  const char* trt_int8_calibration_table_name;  // TensorRT INT8 calibration table name.
+  int trt_int8_use_native_calibration_table;    // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
+  int trt_dla_enable;                           // enable DLA. Default 0 = false, nonzero = true
+  int trt_dla_core;                             // DLA core number. Default 0
+  int trt_dump_subgraphs;                       // dump TRT subgraph. Default 0 = false, nonzero = true
+  int trt_engine_cache_enable;                  // enable engine caching. Default 0 = false, nonzero = true
+  const char* trt_engine_cache_path;            // specify engine cache path
+  int trt_engine_decryption_enable;             // enable engine decryption. Default 0 = false, nonzero = true
+  const char* trt_engine_decryption_lib_path;   // specify engine decryption library path
+  int trt_force_sequential_engine_build;        // force building TensorRT engine sequentially. Default 0 = false, nonzero = true
+  int trt_context_memory_sharing_enable;        // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true
+  int trt_layer_norm_fp32_fallback;             // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true
+  int trt_timing_cache_enable;                  // enable TensorRT timing cache. Default 0 = false, nonzero = true
+  int trt_force_timing_cache;                   // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true
+  int trt_detailed_build_log;                   // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true
+  int trt_build_heuristics_enable;              // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true
+  int trt_sparsity_enable;                      // Control if sparsity can be used by TRT. Default 0 = false, 1 = true
+  int trt_builder_optimization_level;           // Set the builder optimization level. WARNING: levels below 3 do not guarantee good engine performance, but greatly improve build time.  Default 3, valid range [0-5]
+  int trt_auxiliary_streams;                    // Set maximum number of auxiliary streams per inference stream. Setting this value to 0 will lead to optimal memory usage. Default -1 = heuristics
+  const char* trt_tactic_sources;               // pecify the tactics to be used by adding (+) or removing (-) tactics from the default
+                                                // tactic sources (default = all available tactics) e.g. "-CUDNN,+CUBLAS" available keys: "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS"
+  const char* trt_extra_plugin_lib_paths;       // specify extra TensorRT plugin library paths
+  const char* trt_profile_min_shapes;           // Specify the range of the input shapes to build the engine with
+  const char* trt_profile_max_shapes;           // Specify the range of the input shapes to build the engine with
+  const char* trt_profile_opt_shapes;           // Specify the range of the input shapes to build the engine with
+  int trt_cuda_graph_enable;                    // Enable CUDA graph in ORT TRT
 };
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 8393978120489..486e2ff2b90a2 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -4572,14 +4572,6 @@ ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_MIGraphX, _In_ OrtSessio
  */
 ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Dnnl, _In_ OrtSessionOptions* options, int use_arena);
 
-/*
- * This is the old way to add the TensorRT provider to the session, please use SessionOptionsAppendExecutionProvider_TensorRT_V2 above to access the latest functionality
- * This function always exists, but will only succeed if Onnxruntime was built with TensorRT support and the TensorRT provider shared library exists
- *
- * \param device_id CUDA device id, starts from zero.
- */
-ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c b/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c
index 3a1c0d1bb8fa1..d3239c7442c80 100644
--- a/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c
+++ b/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c
@@ -19,6 +19,7 @@
 #include "onnxruntime/core/providers/nnapi/nnapi_provider_factory.h"
 #include "onnxruntime/core/providers/tvm/tvm_provider_factory.h"
 #include "onnxruntime/core/providers/openvino/openvino_provider_factory.h"
+#include "onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h"
 #include "onnxruntime/core/providers/acl/acl_provider_factory.h"
 #include "onnxruntime/core/providers/armnn/armnn_provider_factory.h"
 #include "onnxruntime/core/providers/coreml/coreml_provider_factory.h"
diff --git a/js/node/src/session_options_helper.cc b/js/node/src/session_options_helper.cc
index a0de832d87fe5..70e63da7cefa7 100644
--- a/js/node/src/session_options_helper.cc
+++ b/js/node/src/session_options_helper.cc
@@ -16,6 +16,7 @@
 #include "core/providers/dml/dml_provider_factory.h"
 #endif
 #ifdef USE_TENSORRT
+#include "core/providers/tensorrt/tensorrt_provider_factory.h"
 #include "core/providers/tensorrt/tensorrt_provider_options.h"
 #endif
 #ifdef USE_COREML
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc
index 4e466a5d568a6..54a4d16e4eaf7 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc
@@ -26,16 +26,27 @@ extern TensorrtLogger& GetTensorrtLogger();
  * Note: Current TRT plugin doesn't have APIs to get number of inputs/outputs of the plugin.
  * So, TensorRTCustomOp uses variadic inputs/outputs to pass ONNX graph validation.
  */
-common::Status CreateTensorRTCustomOpDomainList(std::vector<OrtCustomOpDomain*>& domain_list, const std::string extra_plugin_lib_paths) {
+common::Status CreateTensorRTCustomOpDomainList(TensorrtExecutionProviderInfo& info) {
   std::unique_ptr<OrtCustomOpDomain> custom_op_domain = std::make_unique<OrtCustomOpDomain>();
   custom_op_domain->domain_ = "trt.plugins";
 
   // Load any extra TRT plugin library if any.
   // When the TRT plugin library is loaded, the global static object is created and the plugin is registered to TRT registry.
   // This is done through macro, for example, REGISTER_TENSORRT_PLUGIN(VisionTransformerPluginCreator).
+  std::string extra_plugin_lib_paths{""};
+  if (info.has_trt_options) {
+    if (!info.extra_plugin_lib_paths.empty()) {
+      extra_plugin_lib_paths = info.extra_plugin_lib_paths;
+    }
+  } else {
+    const std::string extra_plugin_lib_paths_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kExtraPluginLibPaths);
+    if (!extra_plugin_lib_paths_env.empty()) {
+      extra_plugin_lib_paths = extra_plugin_lib_paths_env;
+    }
+  }
+
   // extra_plugin_lib_paths has the format of "path_1;path_2....;path_n"
-  static bool is_loaded = false;
-  if (!extra_plugin_lib_paths.empty() && !is_loaded) {
+  if (!extra_plugin_lib_paths.empty()) {
     std::stringstream extra_plugin_libs(extra_plugin_lib_paths);
     std::string lib;
     while (std::getline(extra_plugin_libs, lib, ';')) {
@@ -46,59 +57,35 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector<OrtCustomOpDomain*>&
         LOGS_DEFAULT(WARNING) << "[TensorRT EP]" << status.ToString();
       }
     }
-    is_loaded = true;
   }
 
-  try {
-    // Get all registered TRT plugins from registry
-    LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Getting all registered TRT plugins from TRT plugin registry ...";
-    TensorrtLogger trt_logger = GetTensorrtLogger();
-    initLibNvInferPlugins(&trt_logger, "");
-
-    int num_plugin_creator = 0;
-    auto plugin_creators = getPluginRegistry()->getPluginCreatorList(&num_plugin_creator);
-    std::unordered_set<std::string> registered_plugin_names;
+  // Get all registered TRT plugins from registry
+  LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Getting all registered TRT plugins from TRT plugin registry ...";
+  TensorrtLogger trt_logger = GetTensorrtLogger();
+  initLibNvInferPlugins(&trt_logger, "");
 
-    for (int i = 0; i < num_plugin_creator; i++) {
-      auto plugin_creator = plugin_creators[i];
-      std::string plugin_name(plugin_creator->getPluginName());
-      LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] " << plugin_name << ", version : " << plugin_creator->getPluginVersion();
+  int num_plugin_creator = 0;
+  auto plugin_creators = getPluginRegistry()->getPluginCreatorList(&num_plugin_creator);
+  std::unordered_set<std::string> registered_plugin_names;
 
-      // plugin has different versions and we only register once
-      if (registered_plugin_names.find(plugin_name) != registered_plugin_names.end()) {
-        continue;
-      }
+  for (int i = 0; i < num_plugin_creator; i++) {
+    auto plugin_creator = plugin_creators[i];
+    std::string plugin_name(plugin_creator->getPluginName());
+    LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] " << plugin_name << ", version : " << plugin_creator->getPluginVersion();
 
-      std::unique_ptr<TensorRTCustomOp> trt_custom_op = std::make_unique<TensorRTCustomOp>(onnxruntime::kTensorrtExecutionProvider, nullptr);
-      trt_custom_op->SetName(plugin_creator->getPluginName());
-      custom_op_domain->custom_ops_.push_back(trt_custom_op.release());
-      registered_plugin_names.insert(plugin_name);
+    // plugin has different versions and we only register once
+    if (registered_plugin_names.find(plugin_name) != registered_plugin_names.end()) {
+      continue;
     }
-    domain_list.push_back(custom_op_domain.release());
-  } catch (const std::exception&) {
-    LOGS_DEFAULT(WARNING) << "[TensorRT EP] Failed to get TRT plugins from TRT plugin registration. Therefore, TRT EP can't create custom ops for TRT plugins";
-  }
-  return Status::OK();
-}
 
-common::Status CreateTensorRTCustomOpDomainList(TensorrtExecutionProviderInfo& info) {
-  std::vector<OrtCustomOpDomain*> domain_list;
-  std::string extra_plugin_lib_paths{""};
-  if (info.has_trt_options) {
-    if (!info.extra_plugin_lib_paths.empty()) {
-      extra_plugin_lib_paths = info.extra_plugin_lib_paths;
-    }
-  } else {
-    const std::string extra_plugin_lib_paths_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kExtraPluginLibPaths);
-    if (!extra_plugin_lib_paths_env.empty()) {
-      extra_plugin_lib_paths = extra_plugin_lib_paths_env;
-    }
-  }
-  auto status = CreateTensorRTCustomOpDomainList(domain_list, extra_plugin_lib_paths);
-  if (!domain_list.empty()) {
-    info.custom_op_domain_list = domain_list;
+    std::unique_ptr<TensorRTCustomOp> trt_custom_op = std::make_unique<TensorRTCustomOp>(onnxruntime::kTensorrtExecutionProvider, nullptr);
+    trt_custom_op->SetName(plugin_creator->getPluginName());
+    custom_op_domain->custom_ops_.push_back(trt_custom_op.release());
+    registered_plugin_names.insert(plugin_name);
   }
-  return Status::OK();
+  info.custom_op_domain_list.push_back(custom_op_domain.release());
+
+  return common::Status::OK();
 }
 
 void ReleaseTensorRTCustomOpDomain(OrtCustomOpDomain* domain) {
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.h
index 35bd38d818979..98ac3220abffd 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.h
@@ -13,7 +13,6 @@ using namespace onnxruntime;
 namespace onnxruntime {
 
 common::Status LoadDynamicLibrary(onnxruntime::PathString library_name);
-common::Status CreateTensorRTCustomOpDomainList(std::vector<OrtCustomOpDomain*>& domain_list, const std::string extra_plugin_lib_paths);
 common::Status CreateTensorRTCustomOpDomainList(TensorrtExecutionProviderInfo& info);
 void ReleaseTensorRTCustomOpDomain(OrtCustomOpDomain* domain);
 void ReleaseTensorRTCustomOpDomainList(std::vector<OrtCustomOpDomain*>& custom_op_domain_list);
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
index cb7a568d09130..515fc1c62cff1 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
@@ -186,211 +186,4 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor
   };
   return options;
 }
-
-/**
- * Update OrtTensorRTProviderOptionsV2 instance with ProviderOptions (map of string-based key-value pairs)
- *
- * Please note that it will reset the OrtTensorRTProviderOptionsV2 instance first and then set up the provided provider options
- * See TensorrtExecutionProviderInfo::FromProviderOptions() for more details. This function will be called by the C API UpdateTensorRTProviderOptions() also.
- *
- * \param provider_options - a pointer to OrtTensorRTProviderOptionsV2 instance
- * \param options - a reference to ProviderOptions instance
- * \param string_copy - if it's true, it uses strncpy() to copy 'provider option' string from ProviderOptions instance to where the 'provider option' const char pointer in OrtTensorRTProviderOptionsV2 instance points to.
- *                      it it's false, it only saves the pointer and no strncpy().
- *
- * Note: If there is strncpy involved, please remember to deallocate or simply call C API ReleaseTensorRTProviderOptions.
- */
-void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options, const ProviderOptions& options, bool string_copy) {
-  if (provider_options == nullptr) {
-    return;
-  }
-  TensorrtExecutionProviderInfo internal_options = onnxruntime::TensorrtExecutionProviderInfo::FromProviderOptions(options);
-  auto& trt_provider_options_v2 = *reinterpret_cast<OrtTensorRTProviderOptionsV2*>(provider_options);
-  trt_provider_options_v2.device_id = internal_options.device_id;
-
-  // The 'has_user_compute_stream' of the OrtTensorRTProviderOptionsV2 instance can be set by C API UpdateTensorRTProviderOptionsWithValue() as well
-  // We only set the 'has_user_compute_stream' of the OrtTensorRTProviderOptionsV2 instance if it is provided in options
-  if (options.find("has_user_compute_stream") != options.end()) {
-    trt_provider_options_v2.has_user_compute_stream = internal_options.has_user_compute_stream;
-  }
-
-  trt_provider_options_v2.trt_max_partition_iterations = internal_options.max_partition_iterations;
-  trt_provider_options_v2.trt_min_subgraph_size = internal_options.min_subgraph_size;
-  trt_provider_options_v2.trt_max_workspace_size = internal_options.max_workspace_size;
-  trt_provider_options_v2.trt_fp16_enable = internal_options.fp16_enable;
-  trt_provider_options_v2.trt_int8_enable = internal_options.int8_enable;
-
-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.int8_calibration_table_name.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_int8_calibration_table_name = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.int8_calibration_table_name.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.int8_calibration_table_name.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_int8_calibration_table_name = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_int8_calibration_table_name = internal_options.int8_calibration_table_name.c_str();
-  }
-
-  trt_provider_options_v2.trt_int8_use_native_calibration_table = internal_options.int8_use_native_calibration_table;
-  trt_provider_options_v2.trt_dla_enable = internal_options.dla_enable;
-  trt_provider_options_v2.trt_dla_core = internal_options.dla_core;
-  trt_provider_options_v2.trt_dump_subgraphs = internal_options.dump_subgraphs;
-  trt_provider_options_v2.trt_engine_cache_enable = internal_options.engine_cache_enable;
-
-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.engine_cache_path.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_engine_cache_path = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.engine_cache_path.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.engine_cache_path.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_engine_cache_path = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_engine_cache_path = internal_options.engine_cache_path.c_str();
-  }
-
-  trt_provider_options_v2.trt_engine_decryption_enable = internal_options.engine_decryption_enable;
-
-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.engine_decryption_lib_path.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_engine_decryption_lib_path = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.engine_decryption_lib_path.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.engine_decryption_lib_path.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_engine_decryption_lib_path = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_engine_decryption_lib_path = internal_options.engine_decryption_lib_path.c_str();
-  }
-
-  trt_provider_options_v2.trt_force_sequential_engine_build = internal_options.force_sequential_engine_build;
-  trt_provider_options_v2.trt_context_memory_sharing_enable = internal_options.context_memory_sharing_enable;
-  trt_provider_options_v2.trt_layer_norm_fp32_fallback = internal_options.layer_norm_fp32_fallback;
-  trt_provider_options_v2.trt_timing_cache_enable = internal_options.timing_cache_enable;
-  trt_provider_options_v2.trt_force_timing_cache = internal_options.force_timing_cache;
-  trt_provider_options_v2.trt_detailed_build_log = internal_options.detailed_build_log;
-  trt_provider_options_v2.trt_build_heuristics_enable = internal_options.build_heuristics_enable;
-  trt_provider_options_v2.trt_sparsity_enable = internal_options.sparsity_enable;
-  trt_provider_options_v2.trt_builder_optimization_level = internal_options.builder_optimization_level;
-  trt_provider_options_v2.trt_auxiliary_streams = internal_options.auxiliary_streams;
-
-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.tactic_sources.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_tactic_sources = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.tactic_sources.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.tactic_sources.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_tactic_sources = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_tactic_sources = internal_options.tactic_sources.c_str();
-  }
-
-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.extra_plugin_lib_paths.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_extra_plugin_lib_paths = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.extra_plugin_lib_paths.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.extra_plugin_lib_paths.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_extra_plugin_lib_paths = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_extra_plugin_lib_paths = internal_options.extra_plugin_lib_paths.c_str();
-  }
-
-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.profile_min_shapes.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_profile_min_shapes = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.profile_min_shapes.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.profile_min_shapes.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_profile_min_shapes = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_profile_min_shapes = internal_options.profile_min_shapes.c_str();
-  }
-
-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.profile_max_shapes.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_profile_max_shapes = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.profile_max_shapes.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.profile_max_shapes.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_profile_max_shapes = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_profile_max_shapes = internal_options.profile_max_shapes.c_str();
-  }
-
-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.profile_opt_shapes.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_profile_opt_shapes = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.profile_opt_shapes.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.profile_opt_shapes.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_profile_opt_shapes = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_profile_opt_shapes = internal_options.profile_opt_shapes.c_str();
-  }
-
-  trt_provider_options_v2.trt_cuda_graph_enable = internal_options.cuda_graph_enable;
-}
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
index 61a6bf08211be..4fb9837e1c040 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
@@ -54,7 +54,6 @@ struct TensorrtExecutionProviderInfo {
   static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options);
   static ProviderOptions ToProviderOptions(const TensorrtExecutionProviderInfo& info);
   static ProviderOptions ToProviderOptions(const OrtTensorRTProviderOptionsV2& info);
-  static void UpdateProviderOptions(void* provider_options, const ProviderOptions& options, bool string_copy);
 
   std::vector<OrtCustomOpDomain*> custom_op_domain_list;
 };
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
index b5dbe1ac459b1..18ec113734b97 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
@@ -2,7 +2,7 @@
 // Licensed under the MIT License.
 
 #include "core/providers/shared_library/provider_api.h"
-#include "tensorrt_provider_factory.h"
+#include "core/providers/tensorrt/tensorrt_provider_factory.h"
 #include <atomic>
 #include "tensorrt_execution_provider.h"
 #include "tensorrt_provider_factory_creator.h"
@@ -18,45 +18,22 @@ namespace onnxruntime {
 void InitializeRegistry();
 void DeleteRegistry();
 
-struct ProviderInfo_TensorRT_Impl final : ProviderInfo_TensorRT {
-  OrtStatus* GetCurrentGpuDeviceId(_In_ int* device_id) override {
-    auto cuda_err = cudaGetDevice(device_id);
-    if (cuda_err != cudaSuccess) {
-      return CreateStatus(ORT_FAIL, "Failed to get device id.");
-    }
-    return nullptr;
-  }
-
-  OrtStatus* UpdateProviderOptions(void* provider_options, const ProviderOptions& options, bool string_copy) override {
-    TensorrtExecutionProviderInfo::UpdateProviderOptions(provider_options, options, string_copy);
-    return nullptr;
-  }
-
-  OrtStatus* GetTensorRTCustomOpDomainList(std::vector<OrtCustomOpDomain*>& domain_list, const std::string extra_plugin_lib_paths) override {
-    common::Status status = CreateTensorRTCustomOpDomainList(domain_list, extra_plugin_lib_paths);
-    if (!status.IsOK()) {
-      return CreateStatus(ORT_FAIL, "[TensorRT EP] Can't create custom ops for TRT plugins.");
-    }
-    return nullptr;
-  }
-
-  OrtStatus* ReleaseCustomOpDomainList(std::vector<OrtCustomOpDomain*>& domain_list) override {
-    ReleaseTensorRTCustomOpDomainList(domain_list);
-    return nullptr;
-  }
-
-} g_info;
-
 struct TensorrtProviderFactory : IExecutionProviderFactory {
   TensorrtProviderFactory(const TensorrtExecutionProviderInfo& info) : info_{info} {}
   ~TensorrtProviderFactory() override {}
 
   std::unique_ptr<IExecutionProvider> CreateProvider() override;
 
+  void GetCustomOpDomainList(std::vector<OrtCustomOpDomain*>& custom_op_domain_list);
+
  private:
   TensorrtExecutionProviderInfo info_;
 };
 
+void TensorrtProviderFactory::GetCustomOpDomainList(std::vector<OrtCustomOpDomain*>& custom_op_domain_list) {
+  custom_op_domain_list = info_.custom_op_domain_list;
+}
+
 std::unique_ptr<IExecutionProvider> TensorrtProviderFactory::CreateProvider() {
   return std::make_unique<TensorrtExecutionProvider>(info_);
 }
@@ -69,7 +46,6 @@ std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Creat
 }
 
 struct Tensorrt_Provider : Provider {
-  void* GetInfo() override { return &g_info; }
   std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(int device_id) override {
     TensorrtExecutionProviderInfo info;
     info.device_id = device_id;
@@ -79,7 +55,6 @@ struct Tensorrt_Provider : Provider {
     if (!status.IsOK()) {
       LOGS_DEFAULT(WARNING) << "[TensorRT EP] Failed to get TRT plugins from TRT plugin registration.";
     }
-
     return std::make_shared<TensorrtProviderFactory>(info);
   }
 
@@ -129,8 +104,161 @@ struct Tensorrt_Provider : Provider {
     return std::make_shared<TensorrtProviderFactory>(info);
   }
 
+  /**
+   * This function will be called by the C API UpdateTensorRTProviderOptions().
+   *
+   * Please note that it will reset the OrtProviderOptionsV2 instance first and then set up the provided provider options
+   * See TensorrtExecutionProviderInfo::FromProviderOptions() for more details
+   */
   void UpdateProviderOptions(void* provider_options, const ProviderOptions& options) override {
-    TensorrtExecutionProviderInfo::UpdateProviderOptions(provider_options, options, true);
+    auto internal_options = onnxruntime::TensorrtExecutionProviderInfo::FromProviderOptions(options);
+    auto& trt_options = *reinterpret_cast<OrtTensorRTProviderOptionsV2*>(provider_options);
+    trt_options.device_id = internal_options.device_id;
+
+    // The 'has_user_compute_stream' of the OrtTensorRTProviderOptionsV2 instance can be set by C API UpdateTensorRTProviderOptionsWithValue() as well
+    // We only set the 'has_user_compute_stream' of the OrtTensorRTProviderOptionsV2 instance if it is provided in options
+    if (options.find("has_user_compute_stream") != options.end()) {
+      trt_options.has_user_compute_stream = internal_options.has_user_compute_stream;
+    }
+
+    trt_options.trt_max_partition_iterations = internal_options.max_partition_iterations;
+    trt_options.trt_min_subgraph_size = internal_options.min_subgraph_size;
+    trt_options.trt_max_workspace_size = internal_options.max_workspace_size;
+    trt_options.trt_fp16_enable = internal_options.fp16_enable;
+    trt_options.trt_int8_enable = internal_options.int8_enable;
+
+    char* dest = nullptr;
+    auto str_size = internal_options.int8_calibration_table_name.size();
+    if (str_size == 0) {
+      trt_options.trt_int8_calibration_table_name = nullptr;
+    } else {
+      dest = new char[str_size + 1];
+#ifdef _MSC_VER
+      strncpy_s(dest, str_size + 1, internal_options.int8_calibration_table_name.c_str(), str_size);
+#else
+      strncpy(dest, internal_options.int8_calibration_table_name.c_str(), str_size);
+#endif
+      dest[str_size] = '\0';
+      trt_options.trt_int8_calibration_table_name = (const char*)dest;
+    }
+
+    trt_options.trt_int8_use_native_calibration_table = internal_options.int8_use_native_calibration_table;
+    trt_options.trt_dla_enable = internal_options.dla_enable;
+    trt_options.trt_dla_core = internal_options.dla_core;
+    trt_options.trt_dump_subgraphs = internal_options.dump_subgraphs;
+    trt_options.trt_engine_cache_enable = internal_options.engine_cache_enable;
+
+    str_size = internal_options.engine_cache_path.size();
+    if (str_size == 0) {
+      trt_options.trt_engine_cache_path = nullptr;
+    } else {
+      dest = new char[str_size + 1];
+#ifdef _MSC_VER
+      strncpy_s(dest, str_size + 1, internal_options.engine_cache_path.c_str(), str_size);
+#else
+      strncpy(dest, internal_options.engine_cache_path.c_str(), str_size);
+#endif
+      dest[str_size] = '\0';
+      trt_options.trt_engine_cache_path = (const char*)dest;
+    }
+
+    trt_options.trt_engine_decryption_enable = internal_options.engine_decryption_enable;
+
+    str_size = internal_options.engine_decryption_lib_path.size();
+    if (str_size == 0) {
+      trt_options.trt_engine_decryption_lib_path = nullptr;
+    } else {
+      dest = new char[str_size + 1];
+#ifdef _MSC_VER
+      strncpy_s(dest, str_size + 1, internal_options.engine_decryption_lib_path.c_str(), str_size);
+#else
+      strncpy(dest, internal_options.engine_decryption_lib_path.c_str(), str_size);
+#endif
+      dest[str_size] = '\0';
+      trt_options.trt_engine_decryption_lib_path = (const char*)dest;
+    }
+
+    trt_options.trt_force_sequential_engine_build = internal_options.force_sequential_engine_build;
+    trt_options.trt_context_memory_sharing_enable = internal_options.context_memory_sharing_enable;
+    trt_options.trt_layer_norm_fp32_fallback = internal_options.layer_norm_fp32_fallback;
+    trt_options.trt_timing_cache_enable = internal_options.timing_cache_enable;
+    trt_options.trt_force_timing_cache = internal_options.force_timing_cache;
+    trt_options.trt_detailed_build_log = internal_options.detailed_build_log;
+    trt_options.trt_build_heuristics_enable = internal_options.build_heuristics_enable;
+    trt_options.trt_sparsity_enable = internal_options.sparsity_enable;
+    trt_options.trt_builder_optimization_level = internal_options.builder_optimization_level;
+    trt_options.trt_auxiliary_streams = internal_options.auxiliary_streams;
+    str_size = internal_options.tactic_sources.size();
+    if (str_size == 0) {
+      trt_options.trt_tactic_sources = nullptr;
+    } else {
+      dest = new char[str_size + 1];
+#ifdef _MSC_VER
+      strncpy_s(dest, str_size + 1, internal_options.tactic_sources.c_str(), str_size);
+#else
+      strncpy(dest, internal_options.tactic_sources.c_str(), str_size);
+#endif
+      dest[str_size] = '\0';
+      trt_options.trt_tactic_sources = (const char*)dest;
+    }
+
+    str_size = internal_options.extra_plugin_lib_paths.size();
+    if (str_size == 0) {
+      trt_options.trt_extra_plugin_lib_paths = nullptr;
+    } else {
+      dest = new char[str_size + 1];
+#ifdef _MSC_VER
+      strncpy_s(dest, str_size + 1, internal_options.extra_plugin_lib_paths.c_str(), str_size);
+#else
+      strncpy(dest, internal_options.extra_plugin_lib_paths.c_str(), str_size);
+#endif
+      dest[str_size] = '\0';
+      trt_options.trt_extra_plugin_lib_paths = (const char*)dest;
+    }
+
+    str_size = internal_options.profile_min_shapes.size();
+    if (str_size == 0) {
+      trt_options.trt_profile_min_shapes = nullptr;
+    } else {
+      dest = new char[str_size + 1];
+#ifdef _MSC_VER
+      strncpy_s(dest, str_size + 1, internal_options.profile_min_shapes.c_str(), str_size);
+#else
+      strncpy(dest, internal_options.profile_min_shapes.c_str(), str_size);
+#endif
+      dest[str_size] = '\0';
+      trt_options.trt_profile_min_shapes = (const char*)dest;
+    }
+
+    str_size = internal_options.profile_max_shapes.size();
+    if (str_size == 0) {
+      trt_options.trt_profile_max_shapes = nullptr;
+    } else {
+      dest = new char[str_size + 1];
+#ifdef _MSC_VER
+      strncpy_s(dest, str_size + 1, internal_options.profile_max_shapes.c_str(), str_size);
+#else
+      strncpy(dest, internal_options.profile_max_shapes.c_str(), str_size);
+#endif
+      dest[str_size] = '\0';
+      trt_options.trt_profile_max_shapes = (const char*)dest;
+    }
+
+    str_size = internal_options.profile_opt_shapes.size();
+    if (str_size == 0) {
+      trt_options.trt_profile_opt_shapes = nullptr;
+    } else {
+      dest = new char[str_size + 1];
+#ifdef _MSC_VER
+      strncpy_s(dest, str_size + 1, internal_options.profile_opt_shapes.c_str(), str_size);
+#else
+      strncpy(dest, internal_options.profile_opt_shapes.c_str(), str_size);
+#endif
+      dest[str_size] = '\0';
+      trt_options.trt_profile_opt_shapes = (const char*)dest;
+    }
+
+    trt_options.trt_cuda_graph_enable = internal_options.cuda_graph_enable;
   }
 
   ProviderOptions GetProviderOptions(const void* provider_options) override {
@@ -138,6 +266,11 @@ struct Tensorrt_Provider : Provider {
     return onnxruntime::TensorrtExecutionProviderInfo::ToProviderOptions(options);
   }
 
+  void GetCustomOpDomainList(IExecutionProviderFactory* factory, std::vector<OrtCustomOpDomain*>& custom_op_domains_ptr) override {
+    TensorrtProviderFactory* trt_factory = reinterpret_cast<TensorrtProviderFactory*>(factory);
+    trt_factory->GetCustomOpDomainList(custom_op_domains_ptr);
+  }
+
   void Initialize() override {
     InitializeRegistry();
   }
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
deleted file mode 100644
index 231e14e5c95f2..0000000000000
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#include "onnxruntime_c_api.h"
-#include "core/framework/provider_options.h"
-
-namespace onnxruntime {
-struct ProviderInfo_TensorRT {
-  virtual OrtStatus* GetCurrentGpuDeviceId(_In_ int* device_id) = 0;
-  virtual OrtStatus* UpdateProviderOptions(void* provider_options, const ProviderOptions& options, bool string_copy) = 0;
-  virtual OrtStatus* GetTensorRTCustomOpDomainList(std::vector<OrtCustomOpDomain*>& domain_list, const std::string extra_plugin_lib_paths) = 0;
-  virtual OrtStatus* ReleaseCustomOpDomainList(std::vector<OrtCustomOpDomain*>& domain_list) = 0;
-
- protected:
-  ~ProviderInfo_TensorRT() = default;  // Can only be destroyed through a subclass instance
-};
-}  // namespace onnxruntime
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 950275c7c5a3f..bf7a3bbd9d380 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -108,8 +108,6 @@ namespace onnxruntime {
 
 ProviderInfo_CUDA* TryGetProviderInfo_CUDA();
 ProviderInfo_CUDA& GetProviderInfo_CUDA();
-ProviderInfo_TensorRT* TryGetProviderInfo_TensorRT();
-ProviderInfo_TensorRT& GetProviderInfo_TensorRT();
 ProviderInfo_CANN* TryGetProviderInfo_CANN();
 ProviderInfo_CANN& GetProviderInfo_CANN();
 ProviderInfo_Dnnl* TryGetProviderInfo_Dnnl();
@@ -1420,6 +1418,10 @@ std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Creat
   return s_library_tensorrt.Get().CreateExecutionProviderFactory(provider_options);
 }
 
+void TensorrtProviderGetCustomOpDomainList(IExecutionProviderFactory* factory, std::vector<OrtCustomOpDomain*>& custom_op_domains_ptr) {
+  s_library_tensorrt.Get().GetCustomOpDomainList(factory, custom_op_domains_ptr);
+}
+
 std::shared_ptr<IExecutionProviderFactory> MIGraphXProviderFactoryCreator::Create(const OrtMIGraphXProviderOptions* provider_options) {
   return s_library_migraphx.Get().CreateExecutionProviderFactory(provider_options);
 }
@@ -1472,20 +1474,6 @@ ProviderInfo_OpenVINO* GetProviderInfo_OpenVINO() {
   return reinterpret_cast<ProviderInfo_OpenVINO*>(s_library_openvino.Get().GetInfo());
 }
 
-ProviderInfo_TensorRT* TryGetProviderInfo_TensorRT() try {
-  return reinterpret_cast<ProviderInfo_TensorRT*>(s_library_tensorrt.Get().GetInfo());
-} catch (const std::exception& exception) {
-  LOGS_DEFAULT(ERROR) << exception.what();
-  return nullptr;
-}
-
-ProviderInfo_TensorRT& GetProviderInfo_TensorRT() {
-  if (auto* info = TryGetProviderInfo_TensorRT())
-    return *info;
-
-  ORT_THROW("TensorRT Provider not available, can't get interface for it");
-}
-
 ProviderInfo_CUDA* TryGetProviderInfo_CUDA() try {
   return reinterpret_cast<ProviderInfo_CUDA*>(s_library_cuda.Get().GetInfo());
 } catch (const std::exception& exception) {
@@ -1645,9 +1633,7 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtS
   options->provider_factories.push_back(factory);
 
   std::vector<OrtCustomOpDomain*> custom_op_domains;
-  std::string extra_plugin_lib_paths = onnxruntime::Env::Default().GetEnvironmentVar("trt_extra_plugin_lib_paths");
-  onnxruntime::ProviderInfo_TensorRT& provider_info = onnxruntime::GetProviderInfo_TensorRT();
-  provider_info.GetTensorRTCustomOpDomainList(custom_op_domains, extra_plugin_lib_paths);
+  TensorrtProviderGetCustomOpDomainList(factory.get(), custom_op_domains);
   for (auto ptr : custom_op_domains) {
     options->custom_op_domains_.push_back(ptr);
   }
@@ -1678,8 +1664,7 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In
   options->provider_factories.push_back(factory);
 
   std::vector<OrtCustomOpDomain*> custom_op_domains;
-  onnxruntime::ProviderInfo_TensorRT& provider_info = onnxruntime::GetProviderInfo_TensorRT();
-  provider_info.GetTensorRTCustomOpDomainList(custom_op_domains, "");
+  TensorrtProviderGetCustomOpDomainList(factory.get(), custom_op_domains);
   for (auto ptr : custom_op_domains) {
     options->custom_op_domains_.push_back(ptr);
   }
@@ -1787,13 +1772,10 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2,
   options->provider_factories.push_back(factory);
 
   std::vector<OrtCustomOpDomain*> custom_op_domains;
-  std::string extra_plugin_lib_paths = (tensorrt_options == nullptr || tensorrt_options->trt_extra_plugin_lib_paths == nullptr) ? "" : tensorrt_options->trt_extra_plugin_lib_paths;
-  onnxruntime::ProviderInfo_TensorRT& provider_info = onnxruntime::GetProviderInfo_TensorRT();
-  provider_info.GetTensorRTCustomOpDomainList(custom_op_domains, extra_plugin_lib_paths);
+  TensorrtProviderGetCustomOpDomainList(factory.get(), custom_op_domains);
   for (auto ptr : custom_op_domains) {
     options->custom_op_domains_.push_back(ptr);
   }
-
   return nullptr;
   API_IMPL_END
 }
@@ -1802,6 +1784,34 @@ ORT_API_STATUS_IMPL(OrtApis::CreateTensorRTProviderOptions, _Outptr_ OrtTensorRT
   API_IMPL_BEGIN
 #ifdef USE_TENSORRT
   auto options = std::make_unique<OrtTensorRTProviderOptionsV2>();
+  options->device_id = 0;
+  options->has_user_compute_stream = 0;
+  options->user_compute_stream = nullptr;
+  options->trt_max_partition_iterations = 1000;
+  options->trt_min_subgraph_size = 1;
+  options->trt_max_workspace_size = 1 << 30;
+  options->trt_fp16_enable = false;
+  options->trt_int8_enable = false;
+  options->trt_int8_calibration_table_name = nullptr;
+  options->trt_int8_use_native_calibration_table = false;
+  options->trt_dla_enable = false;
+  options->trt_dla_core = false;
+  options->trt_dump_subgraphs = false;
+  options->trt_engine_cache_enable = false;
+  options->trt_engine_cache_path = nullptr;
+  options->trt_engine_decryption_enable = false;
+  options->trt_engine_decryption_lib_path = nullptr;
+  options->trt_force_sequential_engine_build = false;
+  options->trt_context_memory_sharing_enable = false;
+  options->trt_layer_norm_fp32_fallback = false;
+  options->trt_timing_cache_enable = false;
+  options->trt_force_timing_cache = false;
+  options->trt_detailed_build_log = false;
+  options->trt_extra_plugin_lib_paths = nullptr;
+  options->trt_profile_min_shapes = nullptr;
+  options->trt_profile_max_shapes = nullptr;
+  options->trt_profile_opt_shapes = nullptr;
+  options->trt_cuda_graph_enable = false;
   *out = options.release();
   return nullptr;
 #else
diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py
index 1a3e22142f80e..bcc6f15129231 100644
--- a/onnxruntime/python/onnxruntime_inference_collection.py
+++ b/onnxruntime/python/onnxruntime_inference_collection.py
@@ -465,9 +465,6 @@ def _create_inference_session(self, providers, provider_options, disabled_optimi
         )
 
         session_options = self._sess_options if self._sess_options else C.get_default_session_options()
-
-        self._register_ep_custom_ops(session_options, providers, provider_options)
-
         if self._model_path:
             sess = C.InferenceSession(session_options, self._model_path, True, self._read_config_from_model)
         else:
@@ -510,13 +507,6 @@ def _reset_session(self, providers, provider_options):
         self._sess_options = self._sess_options_initial
         self._create_inference_session(providers, provider_options)
 
-    def _register_ep_custom_ops(self, session_options, providers, provider_options):
-        for i in range(len(providers)):
-            if providers[i] == "TensorrtExecutionProvider":
-                C.register_tensorrt_plugins_as_custom_ops(session_options, provider_options[i])
-            elif isinstance(providers[i], tuple) and providers[i][0] == "TensorrtExecutionProvider":
-                C.register_tensorrt_plugins_as_custom_ops(session_options, providers[i][1])
-
 
 class IOBinding:
     """
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 95a8f59186ff5..907ea0ec41e23 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -430,25 +430,6 @@ const ROCMExecutionProviderInfo GetRocmExecutionProviderInfo(ProviderInfo_ROCM*
 }
 #endif
 
-#ifdef USE_TENSORRT
-void RegisterTensorRTPluginsAsCustomOps(PySessionOptions& so, const ProviderOptions& options) {
-  if (auto* tensorrt_provider_info = TryGetProviderInfo_TensorRT()) {
-    std::string trt_extra_plugin_lib_paths = "";
-    const auto it = options.find("trt_extra_plugin_lib_paths");
-    if (it != options.end()) {
-      trt_extra_plugin_lib_paths = it->second;
-    }
-    std::vector<OrtCustomOpDomain*> domain_list;
-    tensorrt_provider_info->GetTensorRTCustomOpDomainList(domain_list, trt_extra_plugin_lib_paths);
-    for (auto ptr : domain_list) {
-      so.custom_op_domains_.push_back(ptr);
-    }
-  } else {
-    ORT_THROW("Please install TensorRT libraries as mentioned in the GPU requirements page, make sure they're in the PATH or LD_LIBRARY_PATH, and that your GPU is supported.");
-  }
-}
-#endif
-
 std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
     const SessionOptions& session_options,
     const std::string& type,
@@ -462,14 +443,43 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
     // If the environment variable 'ORT_TENSORRT_UNAVAILABLE' exists, then we do not load TensorRT. This is set by _ld_preload for the manylinux case
     // as in that case, trying to load the library itself will result in a crash due to the way that auditwheel strips dependencies.
     if (Env::Default().GetEnvironmentVar("ORT_TENSORRT_UNAVAILABLE").empty()) {
-      // provider_options_map is just a reference to the ProviderOptionsMap instance, so it can be released anytime from application.
-      // So we need these std::string variables defined here as they will be kept alive for the lifetime of TRT EP and we can still access them from OrtTensorRTProviderOptionsV2 instance.
-      // (The reason is string copy is involved, for example params.trt_engine_cache_path = cache_path.c_str() and those std::string variable is referenced by OrtTensorRTProviderOptionsV2 instance
-      // and TRT EP instance, so it won't be released.)
-      std::string calibration_table, cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile;
+      std::string calibration_table, cache_path, lib_path, min_profile, max_profile, opt_profile;
       auto it = provider_options_map.find(type);
       if (it != provider_options_map.end()) {
-        OrtTensorRTProviderOptionsV2 params;
+        OrtTensorRTProviderOptionsV2 params{
+            0,
+            0,
+            nullptr,
+            1000,
+            1,
+            1 << 30,
+            0,
+            0,
+            nullptr,
+            0,
+            0,
+            0,
+            0,
+            0,
+            nullptr,
+            0,
+            nullptr,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            2,
+            -1,
+            nullptr,
+            nullptr,
+            nullptr,
+            nullptr,
+            nullptr,
+            0};
         for (auto option : it->second) {
           if (option.first == "device_id") {
             if (!option.second.empty()) {
@@ -656,15 +666,13 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
             }
           } else if (option.first == "trt_tactic_sources") {
             if (!option.second.empty()) {
-              trt_tactic_sources = option.second;
-              params.trt_tactic_sources = trt_tactic_sources.c_str();
+              params.trt_tactic_sources = option.second.c_str();
             } else {
               ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_tactic_sources' should be a string. e.g. \"-CUDNN,+CUBLAS\" available keys: \"CUBLAS\"|\"CUBLAS_LT\"|\"CUDNN\"|\"EDGE_MASK_CONVOLUTIONS\".\n");
             }
           } else if (option.first == "trt_extra_plugin_lib_paths") {
             if (!option.second.empty()) {
-              trt_extra_plugin_lib_paths = option.second;
-              params.trt_extra_plugin_lib_paths = trt_extra_plugin_lib_paths.c_str();
+              params.trt_extra_plugin_lib_paths = option.second.c_str();
             } else {
               ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_extra_plugin_lib_paths' should be a path string.\n");
             }
@@ -1201,12 +1209,6 @@ void addGlobalMethods(py::module& m) {
   });
 #endif
 
-#ifdef USE_TENSORRT
-  m.def(
-      "register_tensorrt_plugins_as_custom_ops", [](PySessionOptions& so, const ProviderOptions& options) { RegisterTensorRTPluginsAsCustomOps(so, options); },
-      "Register TensorRT plugins as custom ops.");
-#endif
-
 #ifdef ENABLE_ATEN
   m.def("register_aten_op_executor",
         [](const std::string& is_tensor_argument_address_str, const std::string& aten_op_executor_address_str) -> void {
diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h
index 5bb6bcc38b6fe..18a9079b5c4f2 100644
--- a/onnxruntime/python/onnxruntime_pybind_state_common.h
+++ b/onnxruntime/python/onnxruntime_pybind_state_common.h
@@ -180,13 +180,6 @@ extern onnxruntime::ArenaExtendStrategy arena_extend_strategy;
 }  // namespace onnxruntime
 #endif
 
-#ifdef USE_TENSORRT
-namespace onnxruntime {
-ProviderInfo_TensorRT* TryGetProviderInfo_TensorRT();
-ProviderInfo_TensorRT& GetProviderInfo_TensorRT();
-}  // namespace onnxruntime
-#endif
-
 #ifdef USE_CANN
 namespace onnxruntime {
 ProviderInfo_CANN* TryGetProviderInfo_CANN();
diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index 999f04398d8dd..da906ebf76f79 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -690,7 +690,11 @@ TEST_P(ModelTest, Run) {
 #endif
       else if (provider_name == "tensorrt") {
         if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) {
-          OrtTensorRTProviderOptionsV2 params;
+          OrtTensorRTProviderOptionsV2 params{0, 0, nullptr, 1000, 1, 1 << 30,
+                                              1,  // enable fp16
+                                              0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0, 0, 0, 0, 0,
+                                              3, -1, nullptr, nullptr, nullptr, nullptr, nullptr, 0};
+
           ortso.AppendExecutionProvider_TensorRT_V2(params);
         } else {
           OrtTensorRTProviderOptionsV2* ep_option = nullptr;
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index aa96e1533653e..288cdfca2b56d 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -175,7 +175,41 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string
   std::vector<int64_t> expected_dims_mul_m = {1, 3, 2};
   std::vector<float> expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f};
 
-  OrtTensorRTProviderOptionsV2 params;
+  OrtTensorRTProviderOptionsV2 params{
+      0,
+      0,
+      nullptr,
+      1000,
+      1,
+      1 << 30,
+      0,
+      0,
+      nullptr,
+      0,
+      0,
+      0,
+      0,
+      0,
+      nullptr,
+      0,
+      nullptr,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      3,
+      -1,
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr,
+      0};
+
   params.trt_engine_cache_enable = 1;
   std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
   EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
@@ -225,7 +259,41 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string
   std::vector<int64_t> expected_dims_nonzero_m = {3, 6};
   std::vector<int64_t> expected_values_nonzero_m = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0, 1, 0, 1, 0, 1};
 
-  OrtTensorRTProviderOptionsV2 params;
+  OrtTensorRTProviderOptionsV2 params{
+      0,
+      0,
+      nullptr,
+      1000,
+      1,
+      1 << 30,
+      0,
+      0,
+      nullptr,
+      0,
+      0,
+      0,
+      0,
+      0,
+      nullptr,
+      0,
+      nullptr,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      3,
+      -1,
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr,
+      0};
+
   params.trt_engine_cache_enable = 1;
   std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
   EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
@@ -354,7 +422,41 @@ TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) {
   output_names.push_back("output");
   std::vector<OrtValue> fetches;
 
-  OrtTensorRTProviderOptionsV2 params;
+  OrtTensorRTProviderOptionsV2 params{
+      0,
+      0,
+      nullptr,
+      1000,
+      1,
+      1 << 30,
+      0,
+      0,
+      nullptr,
+      0,
+      0,
+      0,
+      0,
+      0,
+      nullptr,
+      0,
+      nullptr,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      3,
+      -1,
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr,
+      0};
+
   std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
   EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
   std::cout << model_name << std::endl;
@@ -414,7 +516,41 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
   std::vector<int64_t> expected_dims_mul_m = {1, 3, 2};
   std::vector<float> expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f};
 
-  OrtTensorRTProviderOptionsV2 params;
+  OrtTensorRTProviderOptionsV2 params{
+      0,
+      0,
+      nullptr,
+      1000,
+      1,
+      1 << 30,
+      0,
+      0,
+      nullptr,
+      0,
+      0,
+      0,
+      0,
+      0,
+      nullptr,
+      0,
+      nullptr,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      3,
+      -1,
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr,
+      0};
+
   if (cache_type.compare("engine") == 0) {
     /* Following code block tests the functionality of engine and optimization profile of ORT TRT, including:
      * - engine cache serialization/de-serialization
diff --git a/onnxruntime/test/util/include/providers.h b/onnxruntime/test/util/include/providers.h
index aa489e6cd958b..a6420e2342d9b 100644
--- a/onnxruntime/test/util/include/providers.h
+++ b/onnxruntime/test/util/include/providers.h
@@ -10,6 +10,9 @@
 #ifdef USE_TVM
 #include "core/providers/tvm/tvm_provider_factory.h"
 #endif
+#ifdef USE_TENSORRT
+#include "core/providers/tensorrt/tensorrt_provider_factory.h"
+#endif
 #ifdef USE_OPENVINO
 #include "core/providers/openvino/openvino_provider_factory.h"
 #endif
diff --git a/tools/ci_build/gen_def.py b/tools/ci_build/gen_def.py
index b53fb33659120..9821f3a901c1a 100755
--- a/tools/ci_build/gen_def.py
+++ b/tools/ci_build/gen_def.py
@@ -67,19 +67,7 @@ def parse_arguments():
 
         # external symbols are removed, xnnpack ep will be created via the standard ORT API.
         # https://github.com/microsoft/onnxruntime/pull/11798
-        if c not in (
-            "vitisai",
-            "winml",
-            "cuda",
-            "rocm",
-            "migraphx",
-            "qnn",
-            "snpe",
-            "xnnpack",
-            "cann",
-            "dnnl",
-            "tensorrt",
-        ):
+        if c not in ("vitisai", "winml", "cuda", "rocm", "migraphx", "qnn", "snpe", "xnnpack", "cann", "dnnl"):
             file.write(f"#include <core/providers/{c}/{c}_provider_factory.h>\n")
     file.write("void* GetFunctionEntryByName(const char* name){\n")
     for symbol in symbols:
diff --git a/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml b/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml
index 663ce4338c99f..07aac08dac0b1 100644
--- a/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml
@@ -58,6 +58,7 @@ steps:
           copy $(Build.SourcesDirectory)\include\onnxruntime\core\session\onnxruntime_*.h  $(Build.BinariesDirectory)\${{parameters.artifactName}}\include
           copy $(Build.SourcesDirectory)\include\onnxruntime\core\framework\provider_options.h  $(Build.BinariesDirectory)\${{parameters.artifactName}}\include
           copy $(Build.SourcesDirectory)\include\onnxruntime\core\providers\cpu\cpu_provider_factory.h  $(Build.BinariesDirectory)\${{parameters.artifactName}}\include
+          copy $(Build.SourcesDirectory)\include\onnxruntime\core\providers\tensorrt\tensorrt_provider_factory.h  $(Build.BinariesDirectory)\${{parameters.artifactName}}\include
           copy $(Build.SourcesDirectory)\orttraining\orttraining\training_api\include\onnxruntime_training*.h  $(Build.BinariesDirectory)\${{parameters.artifactName}}\include
 
           REM copy the README, license and TPN
diff --git a/tools/ci_build/github/linux/copy_strip_binary.sh b/tools/ci_build/github/linux/copy_strip_binary.sh
index 73444b35a6768..63690b69fc91a 100755
--- a/tools/ci_build/github/linux/copy_strip_binary.sh
+++ b/tools/ci_build/github/linux/copy_strip_binary.sh
@@ -27,6 +27,7 @@ if [[ -f "$BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_cuda.so" ]]; then
 fi
 if [[ -f "$BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_tensorrt.so" ]]; then
     cp $BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_tensorrt.so $BINARY_DIR/$ARTIFACT_NAME/lib
+    cp $SOURCE_DIR/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h  $BINARY_DIR/$ARTIFACT_NAME/include
 fi
 if [[ -f "$BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_rocm.so" ]]; then
     cp $BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_shared.so $BINARY_DIR/$ARTIFACT_NAME/lib
diff --git a/tools/ci_build/github/linux/extract_and_bundle_gpu_package.sh b/tools/ci_build/github/linux/extract_and_bundle_gpu_package.sh
index 04ac0e35a6d78..9492b7bcf59a6 100755
--- a/tools/ci_build/github/linux/extract_and_bundle_gpu_package.sh
+++ b/tools/ci_build/github/linux/extract_and_bundle_gpu_package.sh
@@ -28,3 +28,4 @@ rm $ARTIFACT_DIR/onnxruntime-linux-x64-cuda-*.tgz
 cp onnxruntime-linux-x64-tensorrt/*/lib/libonnxruntime.so* onnxruntime-linux-x64-gpu/*/lib
 cp onnxruntime-linux-x64-tensorrt/*/lib/libonnxruntime_providers_tensorrt.so onnxruntime-linux-x64-gpu/*/lib
 cp onnxruntime-linux-x64-tensorrt/*/lib/libonnxruntime_providers_shared.so onnxruntime-linux-x64-gpu/*/lib
+cp onnxruntime-linux-x64-tensorrt/*/include/*tensorrt* onnxruntime-linux-x64-gpu/*/include
diff --git a/tools/ci_build/github/windows/bundle_nuget_with_native_headers.bat b/tools/ci_build/github/windows/bundle_nuget_with_native_headers.bat
index bc4a799b2bf40..01d2633ae2104 100644
--- a/tools/ci_build/github/windows/bundle_nuget_with_native_headers.bat
+++ b/tools/ci_build/github/windows/bundle_nuget_with_native_headers.bat
@@ -7,6 +7,7 @@ FOR /R %%i IN (*.nupkg) do (
    set filename=%%~ni
    IF NOT "!filename:~25,7!"=="Managed" (
        mkdir build\native\include
+       copy %BUILD_SOURCESDIRECTORY%\include\onnxruntime\core\providers\tensorrt\tensorrt_provider_factory.h build\native\include\tensorrt_provider_factory.h 
        7z a  %%~ni.nupkg build 
    )
 ) 
diff --git a/tools/nuget/generate_nuspec_for_native_nuget.py b/tools/nuget/generate_nuspec_for_native_nuget.py
index cc27cdc293646..2aefe794db2f5 100644
--- a/tools/nuget/generate_nuspec_for_native_nuget.py
+++ b/tools/nuget/generate_nuspec_for_native_nuget.py
@@ -437,7 +437,14 @@ def generate_files(line_list, args):
         )
 
     if args.execution_provider == "tensorrt":
-        files_list.append("<file src=" + '"' + '" target="build\\native\\include" />')
+        files_list.append(
+            "<file src="
+            + '"'
+            + os.path.join(
+                args.sources_path, "include\\onnxruntime\\core\\providers\\tensorrt\\tensorrt_provider_factory.h"
+            )
+            + '" target="build\\native\\include" />'
+        )
 
     if args.execution_provider == "dnnl":
         files_list.append(
diff --git a/tools/nuget/validate_package.py b/tools/nuget/validate_package.py
index a9996b2779d17..e1b9cf0c3ce11 100644
--- a/tools/nuget/validate_package.py
+++ b/tools/nuget/validate_package.py
@@ -23,6 +23,7 @@
 ]
 gpu_related_header_files = [
     "cpu_provider_factory.h",
+    "tensorrt_provider_factory.h",
     "onnxruntime_c_api.h",
     "onnxruntime_cxx_api.h",
     "onnxruntime_cxx_inline.h",