From df47d755a798bb8822b7ef0e7c101937b86e094b Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 13 Jan 2024 19:17:19 +0000 Subject: [PATCH 01/25] update --- .../core/session/provider_bridge_ort.cc | 123 +++++++++++++++++- onnxruntime/test/perftest/ort_test_session.cc | 2 + 2 files changed, 120 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index e2d46012c097b..3ae897ecab1a7 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -88,6 +88,10 @@ using IndexedSubGraph_MetaDef = IndexedSubGraph::MetaDef; #include "core/providers/cann/cann_provider_options.h" #include "core/providers/dnnl/dnnl_provider_options.h" +#ifdef USE_TENSORRT +#include "core/session/onnxruntime_session_options_config_keys.h" +#endif + // The filename extension for a shared library is different per platform #ifdef _WIN32 #define LIBRARY_PREFIX @@ -1365,10 +1369,6 @@ std::shared_ptr DnnlProviderFactoryCreator::Create(in return s_library_dnnl.Get().CreateExecutionProviderFactory(use_arena); } -std::shared_ptr TensorrtProviderFactoryCreator::Create(int device_id) { - return s_library_tensorrt.Get().CreateExecutionProviderFactory(device_id); -} - std::shared_ptr MIGraphXProviderFactoryCreator::Create(int device_id) { return s_library_migraphx.Get().CreateExecutionProviderFactory(device_id); } @@ -1416,6 +1416,95 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti return trt_options_converted; } +// Get configs from session options that are needed for TensorRT EP +void UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(OrtSessionOptions* session_options, OrtTensorRTProviderOptionsV2* tensorrt_options) { + tensorrt_options->trt_dump_ep_context_model = 1; + std::string embed_mode = (session_options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "1"); + if ("1" == embed_mode) { + tensorrt_options->trt_ep_context_embed_mode = 1; + } else if ("0" == embed_mode) { + tensorrt_options->trt_ep_context_embed_mode = 0; + } else { + LOGS_DEFAULT(VERBOSE) << "Invalid ep.context_embed_mode: " << embed_mode << " only 0 or 1 allowed. Set to 1."; + } + LOGS_DEFAULT(VERBOSE) << "User specified context cache embed mode: " << tensorrt_options->trt_ep_context_embed_mode; +} + +void CopyOrtTensorRTProviderOptionsV2(OrtTensorRTProviderOptionsV2* dst, const OrtTensorRTProviderOptionsV2* src, bool string_copy) { + if (src == nullptr) { + return; + } + auto copy_string_if_needed = [&](std::string s_in) { + if (string_copy) { + char* dest = nullptr; + auto str_size = s_in.size(); + if (str_size == 0) { + return (const char*)nullptr; + } else { + dest = new char[str_size + 1]; +#ifdef _MSC_VER + strncpy_s(dest, str_size + 1, s_in.c_str(), str_size); +#else + strncpy(dest, s_in.c_str(), str_size); +#endif + dest[str_size] = '\0'; + return (const char*)dest; + } + } else { + return s_in.c_str(); + } + }; + + dst->device_id = src->device_id; + dst->has_user_compute_stream = src->has_user_compute_stream; + dst->trt_max_partition_iterations = src->trt_max_partition_iterations; + dst->trt_min_subgraph_size = src->trt_min_subgraph_size; + dst->trt_max_workspace_size = src->trt_max_workspace_size; + dst->trt_fp16_enable = src->trt_fp16_enable; + dst->trt_int8_enable = src->trt_int8_enable; + + dst->trt_int8_calibration_table_name = copy_string_if_needed(src->trt_int8_calibration_table_name); + + dst->trt_int8_use_native_calibration_table = src->trt_int8_use_native_calibration_table; + dst->trt_dla_enable = src->trt_dla_enable; + dst->trt_dla_core = src->trt_dla_core; + dst->trt_dump_subgraphs = src->trt_dump_subgraphs; + dst->trt_engine_cache_enable = src->trt_engine_cache_enable; + + dst->trt_engine_cache_path = copy_string_if_needed(src->trt_engine_cache_path); + dst->trt_timing_cache_path = copy_string_if_needed(src->trt_timing_cache_path); + + dst->trt_engine_decryption_enable = src->trt_engine_decryption_enable; + + dst->trt_engine_decryption_lib_path = copy_string_if_needed(src->trt_engine_decryption_lib_path); + + dst->trt_force_sequential_engine_build = src->trt_force_sequential_engine_build; + dst->trt_context_memory_sharing_enable = src->trt_context_memory_sharing_enable; + dst->trt_layer_norm_fp32_fallback = src->trt_layer_norm_fp32_fallback; + dst->trt_timing_cache_enable = src->trt_timing_cache_enable; + dst->trt_force_timing_cache = src->trt_force_timing_cache; + dst->trt_detailed_build_log = src->trt_detailed_build_log; + dst->trt_build_heuristics_enable = src->trt_build_heuristics_enable; + dst->trt_sparsity_enable = src->trt_sparsity_enable; + dst->trt_builder_optimization_level = src->trt_builder_optimization_level; + dst->trt_auxiliary_streams = src->trt_auxiliary_streams; + + dst->trt_tactic_sources = copy_string_if_needed(src->trt_tactic_sources); + dst->trt_extra_plugin_lib_paths = copy_string_if_needed(src->trt_extra_plugin_lib_paths); + dst->trt_profile_min_shapes = copy_string_if_needed(src->trt_profile_min_shapes); + dst->trt_profile_max_shapes = copy_string_if_needed(src->trt_profile_max_shapes); + dst->trt_profile_opt_shapes = copy_string_if_needed(src->trt_profile_opt_shapes); + + dst->trt_cuda_graph_enable = src->trt_cuda_graph_enable; + dst->trt_dump_ep_context_model = src->trt_dump_ep_context_model; + dst->trt_ep_context_embed_mode = src->trt_ep_context_embed_mode; + dst->trt_ep_context_compute_capability_enable = src->trt_ep_context_compute_capability_enable; +} + +std::shared_ptr TensorrtProviderFactoryCreator::Create(int device_id) { + return s_library_tensorrt.Get().CreateExecutionProviderFactory(device_id); +} + std::shared_ptr TensorrtProviderFactoryCreator::Create(const OrtTensorRTProviderOptions* provider_options) { OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(provider_options); return s_library_tensorrt.Get().CreateExecutionProviderFactory(&trt_options_converted); @@ -1800,7 +1889,31 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_ROCM, _In_ Or ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptionsV2* tensorrt_options) { API_IMPL_BEGIN - auto factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options); + + std::shared_ptr factory; + + auto ep_context_cache_enabled_from_provider_options = tensorrt_options->trt_dump_ep_context_model != 0; + auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") == "1"; + + // If EP context configs are provided in session options, we need to propagate them to provider options. + // However, if provider options already have the EP context configs provided, the configs in session options + // will be ignored since provider options has higher priority than session options. + if (!ep_context_cache_enabled_from_provider_options && ep_context_cache_enabled_from_sess_options) { + // We need to create another provider V2 object since the tensorrt_options points to the "const" object that can't be updated. + OrtTensorRTProviderOptionsV2* new_tensorrt_options = nullptr; + if (OrtApis::CreateTensorRTProviderOptions(&new_tensorrt_options) != nullptr) { + ORT_THROW("Can't create an OrtProviderOptionsV2 object."); + } + auto deleter = [](OrtTensorRTProviderOptionsV2* ptr) { OrtApis::ReleaseTensorRTProviderOptions(ptr); }; + std::unique_ptr rel_trt_options(new_tensorrt_options, deleter); + + onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(options, new_tensorrt_options); + onnxruntime::CopyOrtTensorRTProviderOptionsV2(new_tensorrt_options, tensorrt_options, true); + factory = onnxruntime::TensorrtProviderFactoryCreator::Create(new_tensorrt_options); + } else { + factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options); + } + if (!factory) { return OrtApis::CreateStatus(ORT_FAIL, "OrtSessionOptionsAppendExecutionProvider_TensorRT: Failed to load shared library"); } diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 13082fe69cf48..a3371b390b4d0 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -634,6 +634,8 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); session_options.AddConfigEntry(kOrtSessionOptionsConfigIntraOpThreadAffinities, performance_test_config.run_config.intra_op_thread_affinities.c_str()); } + session_options.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1"); + if (performance_test_config.run_config.disable_spinning) { fprintf(stdout, "Disabling intra-op thread spinning entirely\n"); session_options.AddConfigEntry(kOrtSessionOptionsConfigAllowIntraOpSpinning, "0"); From 3b8c9baa316a122a95d90aea14f6ff6bf916005e Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 15 Jan 2024 20:23:35 +0000 Subject: [PATCH 02/25] update --- .../tensorrt/tensorrt_provider_options.h | 5 +- .../onnxruntime_session_options_config_keys.h | 7 +- .../core/graph/contrib_ops/contrib_defs.cc | 2 +- .../tensorrt/onnx_ctx_model_helper.cc | 81 ++++++++- .../tensorrt/onnx_ctx_model_helper.h | 5 +- .../tensorrt/tensorrt_execution_provider.cc | 30 +++- .../tensorrt/tensorrt_execution_provider.h | 4 +- .../tensorrt_execution_provider_info.cc | 8 +- .../tensorrt_execution_provider_info.h | 3 +- .../tensorrt/tensorrt_provider_factory.cc | 8 +- .../tensorrt_provider_factory_creator.h | 2 + .../core/session/provider_bridge_ort.cc | 155 +++++++----------- 12 files changed, 191 insertions(+), 119 deletions(-) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h index 60196d0c80cbb..b1a751c3468e4 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h @@ -11,6 +11,8 @@ /// User can only get the instance of OrtTensorRTProviderOptionsV2 via CreateTensorRTProviderOptions. /// struct OrtTensorRTProviderOptionsV2 { + OrtTensorRTProviderOptionsV2& operator=(const OrtTensorRTProviderOptionsV2& other); // copy assignment operator + int device_id{0}; // cuda device id. int has_user_compute_stream{0}; // indicator of user specified CUDA compute stream. void* user_compute_stream{nullptr}; // user specified CUDA compute stream. @@ -47,7 +49,8 @@ struct OrtTensorRTProviderOptionsV2 { const char* trt_profile_opt_shapes{nullptr}; // Specify the range of the input shapes to build the engine with int trt_cuda_graph_enable{0}; // Enable CUDA graph in ORT TRT int trt_dump_ep_context_model{0}; // Dump EP context node model + const char* trt_ep_context_file_path{nullptr}; // Specify file name to dump EP context node model. int trt_ep_context_embed_mode{0}; // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data - int trt_ep_context_compute_capability_enable{1}; // Add GPU compute capability as an EP context node's attribute + int trt_ep_context_compute_capability_enable{0}; // Add GPU compute capability as an EP context node's attribute const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix }; diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h index df79cb6e5b21b..104e024c43405 100644 --- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h +++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h @@ -249,4 +249,9 @@ static const char* const kOrtSessionOptionEpContextFilePath = "ep.context_file_p // Flag to specify whether to dump the EP context into the Onnx model. // "0": dump the EP context into separate file, keep the file name in the Onnx model. // "1": dump the EP context into the Onnx model. (default). -static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode"; \ No newline at end of file +static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode"; + +// Enable to check whether the hardware architecture matches the EP context node's "hardware_architecture" attribute. +// "0": disable. (default) +// "1": enable. +static const char* const kOrtSessionOptionEpContextHardwareArchitectureEnable = "ep.context_hardware_architecture_enable"; \ No newline at end of file diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index 982e8fd834b76..68ded671d7ac8 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -3232,7 +3232,7 @@ void RegisterContribSchemas() { OPTIONAL_VALUE) .Attr( "hardware_architecture", - "(Optional) Hardware architecture.", + "(Optional) Hardware architecture for running this EP context node.", AttributeProto::STRING, OPTIONAL_VALUE) .Attr( diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc index 4d8ba6a0891e3..661bfd6603879 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc @@ -137,15 +137,90 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer, return model_proto.release(); } +/* + * Get "EP context node" model path + * + * + * If ep_context_file_path is provided: + * - If ep_context_file_path is a file: + * - If it's a file name without any path associated with it, return "engine_cache_path/ep_context_file_path". + - If it's a file name with path associated with it, return "ep_context_file_path". + * - If ep_context_file_path is a directory, return "ep_context_file_path/original_model_name_ctx.onnx". + * If ep_context_file_path is not provided: + * - Return "engine_cache_path/original_model_name_ctx.onnx". + * + * + * Example 1: + * ep_context_file_path = "/home/user/ep_context_model_foler" + * engine_cache_path = "trt_engine.engine" + * original_model_path = "model.onnx" + * => return "/home/user/ep_context_model_folder/model_ctx.onnx" + * + * Example 2: + * ep_context_file_path = "my_ctx_model.onnx" + * engine_cache_path = "/home/user/cache_folder/trt_engine.engine" + * original_model_path = "model.onnx" + * => return "/home/user/cache_folder/my_ctx_model.onnx" + * + * Example 3: + * ep_context_file_path = "/home/user2/ep_context_model_foler/my_ctx_model.onnx" + * engine_cache_path = "trt_engine.engine" + * original_model_path = "model.onnx" + * => return "/home/user2/ep_context_model_foler/my_ctx_model.onnx" + * + * Example 4: + * ep_context_file_path = "" + * engine_cache_path = "/home/user3/cache_folder/trt_engine.engine" + * original_model_path = "model.onnx" + * => return "/home/user3/cache_folder/model_ctx.onnx" + * + */ +std::string GetCtxNodeModelPath(const std::string& ep_context_file_path, + const std::string& engine_cache_path, + const std::string& original_model_path) { + std::string ctx_model_path; + + if (!ep_context_file_path.empty() && !std::filesystem::is_directory(ep_context_file_path)) { + std::filesystem::path ctx_model_file_path = ep_context_file_path; + if (ctx_model_file_path.filename().string() == ep_context_file_path) { + std::filesystem::path cache_path = engine_cache_path; + if (cache_path.has_parent_path()) { + ctx_model_path = cache_path.parent_path().append(ep_context_file_path).string(); + } else { + ctx_model_path = ep_context_file_path; + } + } else { + ctx_model_path = ep_context_file_path; + } + } else { + std::filesystem::path model_path = original_model_path; + std::filesystem::path model_name_stem = model_path.stem(); // model_name.onnx -> model_name + std::string ctx_model_name = model_name_stem.string() + "_ctx.onnx"; + + if (std::filesystem::is_directory(ep_context_file_path)) { + std::filesystem::path model_directory = ep_context_file_path; + ctx_model_path = model_directory.append(ctx_model_name).string(); + } else { + std::filesystem::path cache_path = engine_cache_path; + if (cache_path.has_parent_path()) { + ctx_model_path = cache_path.parent_path().append(ctx_model_name).string(); + } else { + ctx_model_path = ctx_model_name; + } + } + } + return ctx_model_path; +} + /* * Dump "EP context node" model * */ void DumpCtxNodeModel(ONNX_NAMESPACE::ModelProto* model_proto, - const std::string engine_cache_path) { - std::fstream dump(engine_cache_path + "_wrapper.onnx", std::ios::out | std::ios::trunc | std::ios::binary); + const std::string& ctx_model_path) { + std::fstream dump(ctx_model_path, std::ios::out | std::ios::trunc | std::ios::binary); model_proto->SerializeToOstream(dump); - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized " + engine_cache_path + "_wrapper.onnx"; + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Dumped " + ctx_model_path; } Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph_viewer) { diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h index ab6ea733adfa1..50f235740932c 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h @@ -28,8 +28,11 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer, bool compute_capability_enable, std::string compute_capability, const logging::Logger* logger); +std::string GetCtxNodeModelPath(const std::string& ep_context_file_path, + const std::string& engine_cache_path, + const std::string& original_model_path); void DumpCtxNodeModel(ONNX_NAMESPACE::ModelProto* model_proto, - const std::string engine_cache_path); + const std::string& ctx_model_path); void UpdateCtxNodeModelEngineContext(ONNX_NAMESPACE::ModelProto* model_proto, char* engine_data, size_t size); diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index aa02d8384afa6..88c7cce140ae3 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1381,6 +1381,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv profile_opt_shapes = info.profile_opt_shapes; cuda_graph_enable_ = info.cuda_graph_enable; dump_ep_context_model_ = info.dump_ep_context_model; + ep_context_file_path_ = info.ep_context_file_path; ep_context_embed_mode_ = info.ep_context_embed_mode; ep_context_compute_capability_enable_ = info.ep_context_compute_capability_enable; } else { @@ -1543,6 +1544,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv dump_ep_context_model_ = (std::stoi(dump_ep_context_model_env) == 0 ? false : true); } + const std::string ep_context_file_path_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEpContextComputeCapabilityEnable); + if (!ep_context_file_path_env.empty()) { + ep_context_file_path_ = ep_context_file_path_env; + } + const std::string ep_context_embed_mode_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEpContextEmbedMode); if (!ep_context_embed_mode_env.empty()) { ep_context_embed_mode_ = std::stoi(ep_context_embed_mode_env); @@ -1580,7 +1586,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv dla_core_ = 0; } - if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_ || !cache_prefix_.empty()) { + if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) { if (!cache_path_.empty() && !fs::is_directory(cache_path_)) { if (!fs::create_directory(cache_path_)) { throw std::runtime_error("Failed to create directory " + cache_path_); @@ -1692,6 +1698,10 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv << ", trt_profile_max_shapes: " << profile_max_shapes << ", trt_profile_opt_shapes: " << profile_opt_shapes << ", trt_cuda_graph_enable: " << cuda_graph_enable_ + << ", trt_dump_ep_context_model: " << dump_ep_context_model_ + << ", trt_ep_context_file_path: " << ep_context_file_path_ + << ", trt_ep_context_embed_mode: " << ep_context_embed_mode_ + << ", trt_ep_context_compute_capability_enable: " << ep_context_compute_capability_enable_ << ", trt_cache_prefix: " << cache_prefix_; } @@ -2831,10 +2841,8 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView std::unique_ptr trt_engine; std::unique_ptr trt_context; - // Name the engine cache based on GPU compute capacity and reduce the chance of loading an incompatible cache - // Note: Engine cache generated on a GPU with large memory might not be loadable on a GPU with smaller memory, even if they share the same compute capacity - std::string cache_suffix = ""; std::string cache_path = ""; + std::string cache_suffix = ""; // Customize cache prefix if assigned if (!cache_prefix_.empty()) { // Generate cache suffix in case user would like to customize cache prefix @@ -2843,11 +2851,19 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView } else { cache_path = GetCachePath(cache_path_, trt_node_name_with_precision); } + + // Name the engine cache based on GPU compute capacity and reduce the chance of loading an incompatible cache + // Note: Engine cache generated on a GPU with large memory might not be loadable on a GPU with smaller memory, even if they share the same compute capacity const std::string cache_path_prefix = cache_path + "_sm" + compute_capability_; const std::string engine_cache_path = cache_path_prefix + ".engine"; const std::string encrypted_engine_cache_path = engine_cache_path + ".encrypted"; const std::string profile_cache_path = cache_path_prefix + ".profile"; + // Generate file name for dumping ep context model + if (dump_ep_context_model_ && ctx_model_path_.empty()) { + ctx_model_path_ = GetCtxNodeModelPath(ep_context_file_path_, engine_cache_path, model_path_); + } + if (!has_dynamic_shape) { std::string timing_cache_path = ""; bool engine_update = false; @@ -2992,7 +3008,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView ep_context_compute_capability_enable_, compute_capability_, GetLogger())}; - DumpCtxNodeModel(model_proto.get(), cache_path_prefix); + DumpCtxNodeModel(model_proto.get(), ctx_model_path_); } } } @@ -3061,7 +3077,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView compute_capability_, GetLogger())); if (ep_context_embed_mode_ == 0) { - DumpCtxNodeModel(model_proto_.get(), cache_path_prefix); + DumpCtxNodeModel(model_proto_.get(), ctx_model_path_); } } @@ -3382,7 +3398,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView // dump ep context model if (dump_ep_context_model_ && ep_context_embed_mode_) { UpdateCtxNodeModelEngineContext(model_proto_.get(), reinterpret_cast(serialized_engine->data()), serialized_engine->size()); - DumpCtxNodeModel(model_proto_.get(), cache_path_prefix); + DumpCtxNodeModel(model_proto_.get(), ctx_model_path_); } context_update = true; } diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index 401a8da119ac2..7216a6da6839c 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -293,6 +293,7 @@ class TensorrtExecutionProvider : public IExecutionProvider { bool force_timing_cache_match_ = false; bool detailed_build_log_ = false; bool cuda_graph_enable_ = false; + std::string ctx_model_path_; std::string cache_prefix_; // The OrtAllocator object will be get during ep compute time @@ -301,8 +302,9 @@ class TensorrtExecutionProvider : public IExecutionProvider { // For create/dump EP context node model bool dump_ep_context_model_ = false; + std::string ep_context_file_path_; int ep_context_embed_mode_ = 0; - bool ep_context_compute_capability_enable_ = true; + bool ep_context_compute_capability_enable_ = false; std::unique_ptr model_proto_ = ONNX_NAMESPACE::ModelProto::Create(); std::unordered_set control_flow_op_set_ = {"If", "Loop", "Scan"}; diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc index 28f6e1720f615..1143af60486ea 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc @@ -47,8 +47,9 @@ constexpr const char* kProfilesMinShapes = "trt_profile_min_shapes"; constexpr const char* kProfilesMaxShapes = "trt_profile_max_shapes"; constexpr const char* kProfilesOptShapes = "trt_profile_opt_shapes"; constexpr const char* kCudaGraphEnable = "trt_cuda_graph_enable"; -constexpr const char* kDumpEpContextModel = "trt_dump_ep_context_model"; constexpr const char* kEpContextEmbedMode = "trt_ep_context_embed_mode"; +constexpr const char* kEpContextFilePath = "trt_ep_context_file_path"; +constexpr const char* kDumpEpContextModel = "trt_dump_ep_context_model"; constexpr const char* kEpContextComputeCapabilityEnable = "trt_ep_context_compute_capability_enable"; } // namespace provider_option_names } // namespace tensorrt @@ -103,6 +104,7 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions .AddAssignmentToReference(tensorrt::provider_option_names::kProfilesOptShapes, info.profile_opt_shapes) .AddAssignmentToReference(tensorrt::provider_option_names::kCudaGraphEnable, info.cuda_graph_enable) .AddAssignmentToReference(tensorrt::provider_option_names::kDumpEpContextModel, info.dump_ep_context_model) + .AddAssignmentToReference(tensorrt::provider_option_names::kEpContextFilePath, info.ep_context_file_path) .AddAssignmentToReference(tensorrt::provider_option_names::kEpContextEmbedMode, info.ep_context_embed_mode) .AddAssignmentToReference(tensorrt::provider_option_names::kEpContextComputeCapabilityEnable, info.ep_context_compute_capability_enable) .Parse(options)); // add new provider option here. @@ -148,6 +150,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE {tensorrt::provider_option_names::kProfilesOptShapes, MakeStringWithClassicLocale(info.profile_opt_shapes)}, {tensorrt::provider_option_names::kCudaGraphEnable, MakeStringWithClassicLocale(info.cuda_graph_enable)}, {tensorrt::provider_option_names::kDumpEpContextModel, MakeStringWithClassicLocale(info.dump_ep_context_model)}, + {tensorrt::provider_option_names::kEpContextFilePath, MakeStringWithClassicLocale(info.ep_context_file_path)}, {tensorrt::provider_option_names::kEpContextEmbedMode, MakeStringWithClassicLocale(info.ep_context_embed_mode)}, {tensorrt::provider_option_names::kEpContextComputeCapabilityEnable, MakeStringWithClassicLocale(info.ep_context_compute_capability_enable)}, }; @@ -166,6 +169,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor const std::string kProfilesMinShapes_ = empty_if_null(info.trt_profile_min_shapes); const std::string kProfilesMaxShapes_ = empty_if_null(info.trt_profile_max_shapes); const std::string kProfilesOptShapes_ = empty_if_null(info.trt_profile_opt_shapes); + const std::string kEpContextFilePath_ = empty_if_null(info.trt_ep_context_file_path); const ProviderOptions options{ {tensorrt::provider_option_names::kDeviceId, MakeStringWithClassicLocale(info.device_id)}, @@ -202,6 +206,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor {tensorrt::provider_option_names::kProfilesMaxShapes, kProfilesMaxShapes_}, {tensorrt::provider_option_names::kProfilesOptShapes, kProfilesOptShapes_}, {tensorrt::provider_option_names::kCudaGraphEnable, MakeStringWithClassicLocale(info.trt_cuda_graph_enable)}, + {tensorrt::provider_option_names::kEpContextFilePath, kEpContextFilePath_}, {tensorrt::provider_option_names::kDumpEpContextModel, MakeStringWithClassicLocale(info.trt_dump_ep_context_model)}, {tensorrt::provider_option_names::kEpContextEmbedMode, MakeStringWithClassicLocale(info.trt_ep_context_embed_mode)}, {tensorrt::provider_option_names::kEpContextComputeCapabilityEnable, MakeStringWithClassicLocale(info.trt_ep_context_compute_capability_enable)}, @@ -299,6 +304,7 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options trt_provider_options_v2.trt_cuda_graph_enable = internal_options.cuda_graph_enable; trt_provider_options_v2.trt_dump_ep_context_model = internal_options.dump_ep_context_model; trt_provider_options_v2.trt_ep_context_embed_mode = internal_options.ep_context_embed_mode; + trt_provider_options_v2.trt_ep_context_file_path = copy_string_if_needed(internal_options.ep_context_file_path); trt_provider_options_v2.trt_ep_context_compute_capability_enable = internal_options.ep_context_compute_capability_enable; } } // namespace onnxruntime diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h index a133ef45affe8..2518bdd5337a0 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h @@ -52,8 +52,9 @@ struct TensorrtExecutionProviderInfo { std::string profile_opt_shapes{""}; bool cuda_graph_enable{false}; bool dump_ep_context_model{false}; + std::string ep_context_file_path{""}; int ep_context_embed_mode{0}; - bool ep_context_compute_capability_enable{1}; + bool ep_context_compute_capability_enable{0}; std::string engine_cache_prefix{""}; static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options); diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc index 62f124afbd1e5..722f24c3fd6ae 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc @@ -61,13 +61,6 @@ std::unique_ptr TensorrtProviderFactory::CreateProvider() { return std::make_unique(info_); } -std::shared_ptr TensorrtProviderFactoryCreator::Create(int device_id) { - TensorrtExecutionProviderInfo info; - info.device_id = device_id; - info.has_trt_options = false; - return std::make_shared(info); -} - struct Tensorrt_Provider : Provider { void* GetInfo() override { return &g_info; } std::shared_ptr CreateExecutionProviderFactory(int device_id) override { @@ -117,6 +110,7 @@ struct Tensorrt_Provider : Provider { info.profile_opt_shapes = options.trt_profile_opt_shapes == nullptr ? "" : options.trt_profile_opt_shapes; info.cuda_graph_enable = options.trt_cuda_graph_enable != 0; info.dump_ep_context_model = options.trt_dump_ep_context_model != 0; + info.ep_context_file_path = options.trt_ep_context_file_path == nullptr ? "" : options.trt_ep_context_file_path; info.ep_context_embed_mode = options.trt_ep_context_embed_mode; info.ep_context_compute_capability_enable = options.trt_ep_context_compute_capability_enable != 0; info.engine_cache_prefix = options.trt_engine_cache_prefix == nullptr ? "" : options.trt_engine_cache_prefix; diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h index d905003fb7cc1..96917c8fb8e88 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h @@ -15,6 +15,8 @@ namespace onnxruntime { struct TensorrtProviderFactoryCreator { static std::shared_ptr Create(int device_id); static std::shared_ptr Create(const OrtTensorRTProviderOptions* provider_options); + static std::shared_ptr Create(void* session_options, const OrtTensorRTProviderOptions* provider_options); static std::shared_ptr Create(const OrtTensorRTProviderOptionsV2* provider_options); + static std::shared_ptr Create(void* session_options, const OrtTensorRTProviderOptionsV2* provider_options); }; } // namespace onnxruntime diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 2f5ca1de62d66..784dd98fd952f 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1419,94 +1419,41 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti trt_options_converted.trt_profile_max_shapes = ""; trt_options_converted.trt_profile_opt_shapes = ""; trt_options_converted.trt_cuda_graph_enable = 0; + trt_options_converted.trt_dump_ep_context_model = 0; + trt_options_converted.trt_ep_context_file_path = ""; + trt_options_converted.trt_ep_context_embed_mode = 0; + trt_options_converted.trt_ep_context_compute_capability_enable = 0; trt_options_converted.trt_engine_cache_prefix = ""; return trt_options_converted; } -// Get configs from session options that are needed for TensorRT EP +// Apply configs from session options to TensorRT provider options V2 that are needed for TensorRT EP. +// For example, EP context configs. void UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(OrtSessionOptions* session_options, OrtTensorRTProviderOptionsV2* tensorrt_options) { - tensorrt_options->trt_dump_ep_context_model = 1; - std::string embed_mode = (session_options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "1"); - if ("1" == embed_mode) { - tensorrt_options->trt_ep_context_embed_mode = 1; - } else if ("0" == embed_mode) { - tensorrt_options->trt_ep_context_embed_mode = 0; - } else { - LOGS_DEFAULT(VERBOSE) << "Invalid ep.context_embed_mode: " << embed_mode << " only 0 or 1 allowed. Set to 1."; - } - LOGS_DEFAULT(VERBOSE) << "User specified context cache embed mode: " << tensorrt_options->trt_ep_context_embed_mode; -} - -void CopyOrtTensorRTProviderOptionsV2(OrtTensorRTProviderOptionsV2* dst, const OrtTensorRTProviderOptionsV2* src, bool string_copy) { - if (src == nullptr) { - return; - } - auto copy_string_if_needed = [&](std::string s_in) { - if (string_copy) { - char* dest = nullptr; - auto str_size = s_in.size(); - if (str_size == 0) { - return (const char*)nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, s_in.c_str(), str_size); -#else - strncpy(dest, s_in.c_str(), str_size); -#endif - dest[str_size] = '\0'; - return (const char*)dest; - } + if (session_options) { + auto context_cache_enabled = (session_options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") != "0"; + tensorrt_options->trt_dump_ep_context_model = context_cache_enabled; + LOGS_DEFAULT(VERBOSE) << "Context cache enable: " << context_cache_enabled; + + auto context_cache_path = (session_options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, ""); + tensorrt_options->trt_ep_context_file_path = context_cache_path.c_str(); + LOGS_DEFAULT(VERBOSE) << "User specified context cache path: " << tensorrt_options->trt_ep_context_file_path; + + auto embed_mode = (session_options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "1"); + if ("1" == embed_mode) { + tensorrt_options->trt_ep_context_embed_mode = 1; + } else if ("0" == embed_mode) { + tensorrt_options->trt_ep_context_embed_mode = 0; } else { - return s_in.c_str(); + LOGS_DEFAULT(VERBOSE) << "Invalid ep.context_embed_mode: " << embed_mode << " only 0 or 1 allowed. Set to 1."; } - }; - - dst->device_id = src->device_id; - dst->has_user_compute_stream = src->has_user_compute_stream; - dst->trt_max_partition_iterations = src->trt_max_partition_iterations; - dst->trt_min_subgraph_size = src->trt_min_subgraph_size; - dst->trt_max_workspace_size = src->trt_max_workspace_size; - dst->trt_fp16_enable = src->trt_fp16_enable; - dst->trt_int8_enable = src->trt_int8_enable; - - dst->trt_int8_calibration_table_name = copy_string_if_needed(src->trt_int8_calibration_table_name); - - dst->trt_int8_use_native_calibration_table = src->trt_int8_use_native_calibration_table; - dst->trt_dla_enable = src->trt_dla_enable; - dst->trt_dla_core = src->trt_dla_core; - dst->trt_dump_subgraphs = src->trt_dump_subgraphs; - dst->trt_engine_cache_enable = src->trt_engine_cache_enable; - - dst->trt_engine_cache_path = copy_string_if_needed(src->trt_engine_cache_path); - dst->trt_timing_cache_path = copy_string_if_needed(src->trt_timing_cache_path); - - dst->trt_engine_decryption_enable = src->trt_engine_decryption_enable; - - dst->trt_engine_decryption_lib_path = copy_string_if_needed(src->trt_engine_decryption_lib_path); - - dst->trt_force_sequential_engine_build = src->trt_force_sequential_engine_build; - dst->trt_context_memory_sharing_enable = src->trt_context_memory_sharing_enable; - dst->trt_layer_norm_fp32_fallback = src->trt_layer_norm_fp32_fallback; - dst->trt_timing_cache_enable = src->trt_timing_cache_enable; - dst->trt_force_timing_cache = src->trt_force_timing_cache; - dst->trt_detailed_build_log = src->trt_detailed_build_log; - dst->trt_build_heuristics_enable = src->trt_build_heuristics_enable; - dst->trt_sparsity_enable = src->trt_sparsity_enable; - dst->trt_builder_optimization_level = src->trt_builder_optimization_level; - dst->trt_auxiliary_streams = src->trt_auxiliary_streams; - - dst->trt_tactic_sources = copy_string_if_needed(src->trt_tactic_sources); - dst->trt_extra_plugin_lib_paths = copy_string_if_needed(src->trt_extra_plugin_lib_paths); - dst->trt_profile_min_shapes = copy_string_if_needed(src->trt_profile_min_shapes); - dst->trt_profile_max_shapes = copy_string_if_needed(src->trt_profile_max_shapes); - dst->trt_profile_opt_shapes = copy_string_if_needed(src->trt_profile_opt_shapes); + LOGS_DEFAULT(VERBOSE) << "User specified context cache embed mode: " << tensorrt_options->trt_ep_context_embed_mode; - dst->trt_cuda_graph_enable = src->trt_cuda_graph_enable; - dst->trt_dump_ep_context_model = src->trt_dump_ep_context_model; - dst->trt_ep_context_embed_mode = src->trt_ep_context_embed_mode; - dst->trt_ep_context_compute_capability_enable = src->trt_ep_context_compute_capability_enable; + auto context_hardware_arch_enable = (session_options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextHardwareArchitectureEnable, "0") != "0"; + tensorrt_options->trt_ep_context_compute_capability_enable = context_hardware_arch_enable; + LOGS_DEFAULT(VERBOSE) << "User specified context hardware architecture enable: " << tensorrt_options->trt_ep_context_compute_capability_enable; + } } std::shared_ptr TensorrtProviderFactoryCreator::Create(int device_id) { @@ -1518,10 +1465,26 @@ std::shared_ptr TensorrtProviderFactoryCreator::Creat return s_library_tensorrt.Get().CreateExecutionProviderFactory(&trt_options_converted); } +std::shared_ptr TensorrtProviderFactoryCreator::Create(void* session_options, const OrtTensorRTProviderOptions* provider_options) { + OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(provider_options); + onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast(session_options), &trt_options_converted); + return s_library_tensorrt.Get().CreateExecutionProviderFactory(&trt_options_converted); +} + std::shared_ptr TensorrtProviderFactoryCreator::Create(const OrtTensorRTProviderOptionsV2* provider_options) { return s_library_tensorrt.Get().CreateExecutionProviderFactory(provider_options); } +std::shared_ptr TensorrtProviderFactoryCreator::Create(void* session_options, const OrtTensorRTProviderOptionsV2* provider_options) { + // We need to create a new provider options V2 object and copy from provider_options, due to the "const" object pointed by provider_options can't be modified. + // + // Note: No need to worry about tensorrt_options being a local variable, CreateExecutionProviderFactory() in TRT EP will + // create a factory object that copies any provider options from tensorrt_options including "const char*" provider options. + OrtTensorRTProviderOptionsV2 tensorrt_options = *provider_options; // copy and assign from provider_options + onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast(session_options), &tensorrt_options); + return s_library_tensorrt.Get().CreateExecutionProviderFactory(&tensorrt_options); +} + std::shared_ptr MIGraphXProviderFactoryCreator::Create(const OrtMIGraphXProviderOptions* provider_options) { return s_library_migraphx.Get().CreateExecutionProviderFactory(provider_options); } @@ -1797,7 +1760,18 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_MIGraphX, _In_ OrtS ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptions* tensorrt_options) { API_IMPL_BEGIN - auto factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options); + + std::shared_ptr factory; + + auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") != "0"; + + // If EP context configs are provided in session options, we need to propagate them to provider options + if (ep_context_cache_enabled_from_sess_options) { + factory = onnxruntime::TensorrtProviderFactoryCreator::Create(options, tensorrt_options); + } else { + factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options); + } + if (!factory) { return OrtApis::CreateStatus(ORT_FAIL, "SessionOptionsAppendExecutionProvider_Tensorrt: Failed to load shared library"); } @@ -1938,23 +1912,13 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2, std::shared_ptr factory; auto ep_context_cache_enabled_from_provider_options = tensorrt_options->trt_dump_ep_context_model != 0; - auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") == "1"; + auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") != "0"; - // If EP context configs are provided in session options, we need to propagate them to provider options. - // However, if provider options already have the EP context configs provided, the configs in session options - // will be ignored since provider options has higher priority than session options. + // If EP context configs are provided in session options, we need to propagate them to provider options. However, + // if provider options already have the EP context configs provided, the configs in session options will be ignored + // since provider options has higher priority than session options. if (!ep_context_cache_enabled_from_provider_options && ep_context_cache_enabled_from_sess_options) { - // We need to create another provider V2 object since the tensorrt_options points to the "const" object that can't be updated. - OrtTensorRTProviderOptionsV2* new_tensorrt_options = nullptr; - if (OrtApis::CreateTensorRTProviderOptions(&new_tensorrt_options) != nullptr) { - ORT_THROW("Can't create an OrtProviderOptionsV2 object."); - } - auto deleter = [](OrtTensorRTProviderOptionsV2* ptr) { OrtApis::ReleaseTensorRTProviderOptions(ptr); }; - std::unique_ptr rel_trt_options(new_tensorrt_options, deleter); - - onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(options, new_tensorrt_options); - onnxruntime::CopyOrtTensorRTProviderOptionsV2(new_tensorrt_options, tensorrt_options, true); - factory = onnxruntime::TensorrtProviderFactoryCreator::Create(new_tensorrt_options); + factory = onnxruntime::TensorrtProviderFactoryCreator::Create(options, tensorrt_options); } else { factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options); } @@ -2104,6 +2068,7 @@ ORT_API(void, OrtApis::ReleaseTensorRTProviderOptions, _Frees_ptr_opt_ OrtTensor delete[] ptr->trt_profile_min_shapes; delete[] ptr->trt_profile_max_shapes; delete[] ptr->trt_profile_opt_shapes; + delete[] ptr->trt_ep_context_file_path; } std::unique_ptr p(ptr); From 55eca2e529daac9ea45facee795162da2bca3ae4 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 15 Jan 2024 21:30:54 +0000 Subject: [PATCH 03/25] update --- onnxruntime/core/graph/contrib_ops/contrib_defs.cc | 2 +- onnxruntime/python/onnxruntime_pybind_state.cc | 9 ++++++++- onnxruntime/test/perftest/ort_test_session.cc | 4 ++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index 68ded671d7ac8..982e8fd834b76 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -3232,7 +3232,7 @@ void RegisterContribSchemas() { OPTIONAL_VALUE) .Attr( "hardware_architecture", - "(Optional) Hardware architecture for running this EP context node.", + "(Optional) Hardware architecture.", AttributeProto::STRING, OPTIONAL_VALUE) .Attr( diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index d2cd6140b838e..9ce3a9a5fa07d 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -475,7 +475,7 @@ std::unique_ptr CreateExecutionProviderInstance( // So we need these std::string variables defined here as they will be kept alive for the lifetime of TRT EP and we can still access them from OrtTensorRTProviderOptionsV2 instance. // (The reason is string copy is involved, for example params.trt_engine_cache_path = cache_path.c_str() and those std::string variable is referenced by OrtTensorRTProviderOptionsV2 instance // and TRT EP instance, so it won't be released.) - std::string calibration_table, cache_path, cache_prefix, timing_cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile; + std::string calibration_table, cache_path, cache_prefix, timing_cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile, ep_context_file_path; auto it = provider_options_map.find(type); if (it != provider_options_map.end()) { OrtTensorRTProviderOptionsV2 params; @@ -728,6 +728,13 @@ std::unique_ptr CreateExecutionProviderInstance( } else { ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_ep_context_model' should be 'True' or 'False'. Default value is 'False'.\n"); } + } else if (option.first == "trt_ep_context_file_path") { + if (!option.second.empty()) { + ep_context_file_path = option.second; + params.trt_ep_context_file_path = ep_context_file_path.c_str(); + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_ep_context_file_path' should be a string.\n"); + } } else if (option.first == "trt_ep_context_embed_mode") { if (!option.second.empty()) { params.trt_ep_context_embed_mode = std::stoi(option.second); diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 24bd550923b95..babd0786c99bb 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -46,6 +46,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device const TestModelInfo& m) : rand_engine_(rd()), input_names_(m.GetInputCount()), input_names_str_(m.GetInputCount()), input_length_(m.GetInputCount()) { Ort::SessionOptions session_options; + session_options.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1"); + session_options.AddConfigEntry(kOrtSessionOptionEpContextFilePath, "E:\\"); provider_name_ = performance_test_config.machine_config.provider_type_name; if (provider_name_ == onnxruntime::kDnnlExecutionProvider) { #ifdef USE_DNNL @@ -634,8 +636,6 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); session_options.AddConfigEntry(kOrtSessionOptionsConfigIntraOpThreadAffinities, performance_test_config.run_config.intra_op_thread_affinities.c_str()); } - session_options.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1"); - if (performance_test_config.run_config.disable_spinning) { fprintf(stdout, "Disabling intra-op thread spinning entirely\n"); session_options.AddConfigEntry(kOrtSessionOptionsConfigAllowIntraOpSpinning, "0"); From 7082994150822233fc517471fcf09f854eb3268e Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 15 Jan 2024 21:31:51 +0000 Subject: [PATCH 04/25] update --- onnxruntime/test/perftest/ort_test_session.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index babd0786c99bb..f8a012af5bb13 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -46,8 +46,6 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device const TestModelInfo& m) : rand_engine_(rd()), input_names_(m.GetInputCount()), input_names_str_(m.GetInputCount()), input_length_(m.GetInputCount()) { Ort::SessionOptions session_options; - session_options.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1"); - session_options.AddConfigEntry(kOrtSessionOptionEpContextFilePath, "E:\\"); provider_name_ = performance_test_config.machine_config.provider_type_name; if (provider_name_ == onnxruntime::kDnnlExecutionProvider) { #ifdef USE_DNNL From 7b7a68298141ce83a3b1b28622974bb959331465 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 15 Jan 2024 22:44:00 +0000 Subject: [PATCH 05/25] add warning message --- .../core/providers/tensorrt/tensorrt_provider_options.h | 2 +- .../core/session/onnxruntime_session_options_config_keys.h | 3 ++- .../core/providers/tensorrt/onnx_ctx_model_helper.cc | 5 ++++- .../core/providers/tensorrt/onnx_ctx_model_helper.h | 7 ++++++- .../core/providers/tensorrt/tensorrt_execution_provider.cc | 2 +- 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h index b1a751c3468e4..2443fde022415 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h @@ -51,6 +51,6 @@ struct OrtTensorRTProviderOptionsV2 { int trt_dump_ep_context_model{0}; // Dump EP context node model const char* trt_ep_context_file_path{nullptr}; // Specify file name to dump EP context node model. int trt_ep_context_embed_mode{0}; // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data - int trt_ep_context_compute_capability_enable{0}; // Add GPU compute capability as an EP context node's attribute + int trt_ep_context_compute_capability_enable{1}; // Add GPU compute capability as an EP context node's attribute and check it against the compute capability when running const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix }; diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h index 104e024c43405..9dafffc79c523 100644 --- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h +++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h @@ -251,7 +251,8 @@ static const char* const kOrtSessionOptionEpContextFilePath = "ep.context_file_p // "1": dump the EP context into the Onnx model. (default). static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode"; -// Enable to check whether the hardware architecture matches the EP context node's "hardware_architecture" attribute. +// Enable to dump the EP context node with "hardware_architecture" attribute and check this attribute against the +// hardware architecture when inferencing. // "0": disable. (default) // "1": enable. static const char* const kOrtSessionOptionEpContextHardwareArchitectureEnable = "ep.context_hardware_architecture_enable"; \ No newline at end of file diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc index 661bfd6603879..b7fbf60e304ff 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc @@ -107,6 +107,7 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer, engine_data_str.assign(engine_data, size); } attr_1->set_s(engine_data_str); + LOGS_DEFAULT(WARNING) << EPCONTEXT_WARNING; } else { attr_1->set_s(engine_cache_path); } @@ -269,7 +270,7 @@ bool TensorRTCacheModelHandler::ValidateEPCtxNode(const GraphViewer& graph_viewe auto& attrs = node->GetAttributes(); // Check hardware_architecture(compute_capability) if it's present as an attribute - if (attrs.count(COMPUTE_CAPABILITY) > 0) { + if (compute_capability_enable_ && attrs.count(COMPUTE_CAPABILITY) > 0) { std::string model_compute_capability = attrs.at(COMPUTE_CAPABILITY).s(); if (model_compute_capability != compute_capability_) { LOGS_DEFAULT(ERROR) << "The compute capability of the engine cache doesn't match with the GPU's compute capability"; @@ -297,6 +298,8 @@ bool TensorRTCacheModelHandler::ValidateEPCtxNode(const GraphViewer& graph_viewe return false; } } + } else if (embed_mode == 1) { + LOGS_DEFAULT(WARNING) << EPCONTEXT_WARNING; } } return true; diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h index 50f235740932c..897bf123f8596 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h @@ -16,6 +16,9 @@ static const std::string EMBED_MODE = "embed_mode"; static const std::string EP_CACHE_CONTEXT = "ep_cache_context"; static const std::string COMPUTE_CAPABILITY = "hardware_architecture"; static const std::string EPCONTEXT_OP_DOMAIN = "com.microsoft"; +static const std::string EPCONTEXT_WARNING = "It's suggested to set the ORT graph optimization level to 0 and \ + make \"embed_mode\" to 0 (\"ep_cache_context\" is the cache path)\ + for the best model loading time"; bool GraphHasCtxNode(const GraphViewer& graph_viewer); const onnxruntime::Path& GetModelPath(const GraphViewer& graph_viewer); @@ -41,7 +44,8 @@ class TensorRTCacheModelHandler { public: TensorRTCacheModelHandler(std::unique_ptr* trt_engine, nvinfer1::IRuntime* trt_runtime, - std::string compute_capability) : trt_engine_(trt_engine), trt_runtime_(trt_runtime), compute_capability_(compute_capability) { + std::string compute_capability, + bool compute_capability_enable = true) : trt_engine_(trt_engine), trt_runtime_(trt_runtime), compute_capability_(compute_capability), compute_capability_enable_(compute_capability_enable) { } ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TensorRTCacheModelHandler); @@ -54,5 +58,6 @@ class TensorRTCacheModelHandler { nvinfer1::IRuntime* trt_runtime_; std::filesystem::path engine_cache_path_; std::string compute_capability_; + bool compute_capability_enable_; }; // TRTCacheModelHandler } // namespace onnxruntime diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index 88c7cce140ae3..a3d901d8dd14f 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -3591,7 +3591,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con std::unordered_map output_types; // TRT engine output name -> ORT output tensor type // Get engine binary data and deserialize it - auto trt_cache_model_handler = TensorRTCacheModelHandler(&trt_engine, runtime_.get(), compute_capability_); + auto trt_cache_model_handler = TensorRTCacheModelHandler(&trt_engine, runtime_.get(), compute_capability_, ep_context_compute_capability_enable_); auto status = trt_cache_model_handler.GetEpContextFromGraph(graph_body_viewer); if (status != Status::OK()) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, status.ErrorMessage()); From 69ed710e0a5f9790b8bc2feb5a7a7946b317f694 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 15 Jan 2024 22:56:46 +0000 Subject: [PATCH 06/25] support trt plugins for the script --- .../gen_trt_engine_wrapper_onnx_model.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py b/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py index 717a0816247e7..92e92699299d5 100644 --- a/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py +++ b/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py @@ -15,6 +15,7 @@ def __init__(self, args): engine_cache_path = args.trt_engine_cache_path self.model_name = args.model_name self.dynamic_dim_count = 0 + self.plugins = args.plugins # Get serialized engine from engine cache with open(engine_cache_path, "rb") as file: @@ -25,8 +26,15 @@ def __init__(self, args): else: ep_cache_context_content = engine_cache_path - # Deserialize an TRT engine logger = trt.Logger(trt.Logger.WARNING) + + # Enable TRT plugins + trt.init_libnvinfer_plugins(logger, "") + if len(self.plugins): + import ctypes + ctypes.CDLL(self.plugins) + + # Deserialize an TRT engine runtime = trt.Runtime(logger) engine = runtime.deserialize_cuda_engine(engine_buffer) num_bindings = engine.num_bindings @@ -165,6 +173,14 @@ def main(): default="trt_engine_wrapper.onnx", type=str, ) + parser.add_argument( + "--plugins", + help="List of plugin paths to load", + required=False, + default=[], + nargs="+", + type=str, + ) args = parser.parse_args() ctor = TensorRTEngineWrapperCreator(args) ctor.create_model() From 9111d135bc3431cd5eed210c0e8aeb5562f33418 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 16 Jan 2024 21:10:55 +0000 Subject: [PATCH 07/25] fix bug for minimal build --- onnxruntime/core/session/provider_bridge_ort.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 784dd98fd952f..943d0e0a0d277 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1428,6 +1428,7 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti return trt_options_converted; } +#ifdef USE_TENSORRT // Apply configs from session options to TensorRT provider options V2 that are needed for TensorRT EP. // For example, EP context configs. void UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(OrtSessionOptions* session_options, OrtTensorRTProviderOptionsV2* tensorrt_options) { @@ -1455,6 +1456,7 @@ void UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(OrtSessionOptio LOGS_DEFAULT(VERBOSE) << "User specified context hardware architecture enable: " << tensorrt_options->trt_ep_context_compute_capability_enable; } } +#endif std::shared_ptr TensorrtProviderFactoryCreator::Create(int device_id) { return s_library_tensorrt.Get().CreateExecutionProviderFactory(device_id); @@ -1467,7 +1469,11 @@ std::shared_ptr TensorrtProviderFactoryCreator::Creat std::shared_ptr TensorrtProviderFactoryCreator::Create(void* session_options, const OrtTensorRTProviderOptions* provider_options) { OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(provider_options); +#ifdef USE_TENSORRT onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast(session_options), &trt_options_converted); +#else + ORT_UNUSED_PARAMETER(session_options); +#endif return s_library_tensorrt.Get().CreateExecutionProviderFactory(&trt_options_converted); } @@ -1481,7 +1487,11 @@ std::shared_ptr TensorrtProviderFactoryCreator::Creat // Note: No need to worry about tensorrt_options being a local variable, CreateExecutionProviderFactory() in TRT EP will // create a factory object that copies any provider options from tensorrt_options including "const char*" provider options. OrtTensorRTProviderOptionsV2 tensorrt_options = *provider_options; // copy and assign from provider_options +#ifdef USE_TENSORRT onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast(session_options), &tensorrt_options); +#else + ORT_UNUSED_PARAMETER(session_options); +#endif return s_library_tensorrt.Get().CreateExecutionProviderFactory(&tensorrt_options); } From f7fe5e7518d5a7487768bc732b7ad92af96facac Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 16 Jan 2024 21:55:57 +0000 Subject: [PATCH 08/25] fix bug for minimal build --- .../core/session/provider_bridge_ort.cc | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 943d0e0a0d277..f715a07acdcb7 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1428,7 +1428,7 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti return trt_options_converted; } -#ifdef USE_TENSORRT +#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) // Apply configs from session options to TensorRT provider options V2 that are needed for TensorRT EP. // For example, EP context configs. void UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(OrtSessionOptions* session_options, OrtTensorRTProviderOptionsV2* tensorrt_options) { @@ -1469,11 +1469,13 @@ std::shared_ptr TensorrtProviderFactoryCreator::Creat std::shared_ptr TensorrtProviderFactoryCreator::Create(void* session_options, const OrtTensorRTProviderOptions* provider_options) { OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(provider_options); -#ifdef USE_TENSORRT + +#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast(session_options), &trt_options_converted); #else ORT_UNUSED_PARAMETER(session_options); #endif + return s_library_tensorrt.Get().CreateExecutionProviderFactory(&trt_options_converted); } @@ -1487,7 +1489,8 @@ std::shared_ptr TensorrtProviderFactoryCreator::Creat // Note: No need to worry about tensorrt_options being a local variable, CreateExecutionProviderFactory() in TRT EP will // create a factory object that copies any provider options from tensorrt_options including "const char*" provider options. OrtTensorRTProviderOptionsV2 tensorrt_options = *provider_options; // copy and assign from provider_options -#ifdef USE_TENSORRT + +#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast(session_options), &tensorrt_options); #else ORT_UNUSED_PARAMETER(session_options); @@ -1773,7 +1776,11 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In std::shared_ptr factory; +#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") != "0"; +#else + auto ep_context_cache_enabled_from_sess_options = false; +#endif // If EP context configs are provided in session options, we need to propagate them to provider options if (ep_context_cache_enabled_from_sess_options) { @@ -1921,8 +1928,13 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2, std::shared_ptr factory; +#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) auto ep_context_cache_enabled_from_provider_options = tensorrt_options->trt_dump_ep_context_model != 0; auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") != "0"; +#else + auto ep_context_cache_enabled_from_provider_options = false; + auto ep_context_cache_enabled_from_sess_options = false; +#endif // If EP context configs are provided in session options, we need to propagate them to provider options. However, // if provider options already have the EP context configs provided, the configs in session options will be ignored From ffffb51cfbf5575e783ded6bcc69a7abc329848e Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 17 Jan 2024 18:00:44 +0000 Subject: [PATCH 09/25] remove trt_ep_context_compute_capability_check and only show the warning if not matched --- .../providers/tensorrt/tensorrt_provider_options.h | 1 - .../session/onnxruntime_session_options_config_keys.h | 8 +------- .../core/providers/tensorrt/onnx_ctx_model_helper.cc | 11 +++++------ .../core/providers/tensorrt/onnx_ctx_model_helper.h | 4 +--- .../providers/tensorrt/tensorrt_execution_provider.cc | 6 ++---- .../providers/tensorrt/tensorrt_execution_provider.h | 1 - .../tensorrt/tensorrt_execution_provider_info.cc | 5 ----- .../tensorrt/tensorrt_execution_provider_info.h | 1 - .../providers/tensorrt/tensorrt_provider_factory.cc | 1 - onnxruntime/core/session/provider_bridge_ort.cc | 7 +------ onnxruntime/python/onnxruntime_pybind_state.cc | 8 -------- .../test/providers/tensorrt/tensorrt_basic_test.cc | 10 ++++++++++ 12 files changed, 20 insertions(+), 43 deletions(-) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h index 2443fde022415..1d9af3f18d184 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h @@ -51,6 +51,5 @@ struct OrtTensorRTProviderOptionsV2 { int trt_dump_ep_context_model{0}; // Dump EP context node model const char* trt_ep_context_file_path{nullptr}; // Specify file name to dump EP context node model. int trt_ep_context_embed_mode{0}; // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data - int trt_ep_context_compute_capability_enable{1}; // Add GPU compute capability as an EP context node's attribute and check it against the compute capability when running const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix }; diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h index 9dafffc79c523..df79cb6e5b21b 100644 --- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h +++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h @@ -249,10 +249,4 @@ static const char* const kOrtSessionOptionEpContextFilePath = "ep.context_file_p // Flag to specify whether to dump the EP context into the Onnx model. // "0": dump the EP context into separate file, keep the file name in the Onnx model. // "1": dump the EP context into the Onnx model. (default). -static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode"; - -// Enable to dump the EP context node with "hardware_architecture" attribute and check this attribute against the -// hardware architecture when inferencing. -// "0": disable. (default) -// "1": enable. -static const char* const kOrtSessionOptionEpContextHardwareArchitectureEnable = "ep.context_hardware_architecture_enable"; \ No newline at end of file +static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode"; \ No newline at end of file diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc index b7fbf60e304ff..0bf0cd2635f13 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc @@ -269,14 +269,13 @@ bool TensorRTCacheModelHandler::ValidateEPCtxNode(const GraphViewer& graph_viewe auto node = graph_viewer.GetNode(0); auto& attrs = node->GetAttributes(); - // Check hardware_architecture(compute_capability) if it's present as an attribute - if (compute_capability_enable_ && attrs.count(COMPUTE_CAPABILITY) > 0) { + // Show the warning if compute capability is not matched + if (attrs.count(COMPUTE_CAPABILITY) > 0) { std::string model_compute_capability = attrs.at(COMPUTE_CAPABILITY).s(); if (model_compute_capability != compute_capability_) { - LOGS_DEFAULT(ERROR) << "The compute capability of the engine cache doesn't match with the GPU's compute capability"; - LOGS_DEFAULT(ERROR) << "The compute capability of the engine cache: " << model_compute_capability; - LOGS_DEFAULT(ERROR) << "The compute capability of the GPU: " << compute_capability_; - return false; + LOGS_DEFAULT(WARNING) << "[TensorRT EP] Engine was compiled for a different compatibility level and might not work or perform suboptimal"; + LOGS_DEFAULT(WARNING) << "[TensorRT EP] The compute capability of the engine: " << model_compute_capability; + LOGS_DEFAULT(WARNING) << "[TensorRT EP] The compute capability of the GPU: " << compute_capability_; } } diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h index 897bf123f8596..90fbdb7537bec 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h @@ -44,8 +44,7 @@ class TensorRTCacheModelHandler { public: TensorRTCacheModelHandler(std::unique_ptr* trt_engine, nvinfer1::IRuntime* trt_runtime, - std::string compute_capability, - bool compute_capability_enable = true) : trt_engine_(trt_engine), trt_runtime_(trt_runtime), compute_capability_(compute_capability), compute_capability_enable_(compute_capability_enable) { + std::string compute_capability) : trt_engine_(trt_engine), trt_runtime_(trt_runtime), compute_capability_(compute_capability) { } ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TensorRTCacheModelHandler); @@ -58,6 +57,5 @@ class TensorRTCacheModelHandler { nvinfer1::IRuntime* trt_runtime_; std::filesystem::path engine_cache_path_; std::string compute_capability_; - bool compute_capability_enable_; }; // TRTCacheModelHandler } // namespace onnxruntime diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index a3d901d8dd14f..bfa6a2cc40834 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1079,8 +1079,6 @@ Status BindKernelOutput(Ort::KernelContext& ctx, char const* output_name, size_t output_index, size_t output_type, - std::vector>& scratch_buffers, - OrtAllocator* alloc, cudaStream_t stream) { auto allocator = allocator_map[output_name].get(); auto& shape = allocator->getOutputShape(); @@ -3537,7 +3535,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView if (index_iter != output_indexes.end()) { output_index = index_iter->second; } - auto status = BindKernelOutput(ctx, &mem_info, dds_output_allocator_map, output_name, output_index, output_type, scratch_buffers, alloc, stream); + auto status = BindKernelOutput(ctx, &mem_info, dds_output_allocator_map, output_name, output_index, output_type, stream); if (status != Status::OK()) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, status.ErrorMessage()); } @@ -3818,7 +3816,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con if (index_iter != output_indexes.end()) { output_index = index_iter->second; } - auto status = BindKernelOutput(ctx, &mem_info, dds_output_allocator_map, output_name, output_index, output_type, scratch_buffers, alloc, stream); + auto status = BindKernelOutput(ctx, &mem_info, dds_output_allocator_map, output_name, output_index, output_type, stream); if (status != Status::OK()) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, status.ErrorMessage()); } diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index 7216a6da6839c..86645fabd36d9 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -304,7 +304,6 @@ class TensorrtExecutionProvider : public IExecutionProvider { bool dump_ep_context_model_ = false; std::string ep_context_file_path_; int ep_context_embed_mode_ = 0; - bool ep_context_compute_capability_enable_ = false; std::unique_ptr model_proto_ = ONNX_NAMESPACE::ModelProto::Create(); std::unordered_set control_flow_op_set_ = {"If", "Loop", "Scan"}; diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc index 1143af60486ea..ba9251c71bced 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc @@ -50,7 +50,6 @@ constexpr const char* kCudaGraphEnable = "trt_cuda_graph_enable"; constexpr const char* kEpContextEmbedMode = "trt_ep_context_embed_mode"; constexpr const char* kEpContextFilePath = "trt_ep_context_file_path"; constexpr const char* kDumpEpContextModel = "trt_dump_ep_context_model"; -constexpr const char* kEpContextComputeCapabilityEnable = "trt_ep_context_compute_capability_enable"; } // namespace provider_option_names } // namespace tensorrt @@ -106,7 +105,6 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions .AddAssignmentToReference(tensorrt::provider_option_names::kDumpEpContextModel, info.dump_ep_context_model) .AddAssignmentToReference(tensorrt::provider_option_names::kEpContextFilePath, info.ep_context_file_path) .AddAssignmentToReference(tensorrt::provider_option_names::kEpContextEmbedMode, info.ep_context_embed_mode) - .AddAssignmentToReference(tensorrt::provider_option_names::kEpContextComputeCapabilityEnable, info.ep_context_compute_capability_enable) .Parse(options)); // add new provider option here. return info; @@ -152,7 +150,6 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE {tensorrt::provider_option_names::kDumpEpContextModel, MakeStringWithClassicLocale(info.dump_ep_context_model)}, {tensorrt::provider_option_names::kEpContextFilePath, MakeStringWithClassicLocale(info.ep_context_file_path)}, {tensorrt::provider_option_names::kEpContextEmbedMode, MakeStringWithClassicLocale(info.ep_context_embed_mode)}, - {tensorrt::provider_option_names::kEpContextComputeCapabilityEnable, MakeStringWithClassicLocale(info.ep_context_compute_capability_enable)}, }; return options; } @@ -209,7 +206,6 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor {tensorrt::provider_option_names::kEpContextFilePath, kEpContextFilePath_}, {tensorrt::provider_option_names::kDumpEpContextModel, MakeStringWithClassicLocale(info.trt_dump_ep_context_model)}, {tensorrt::provider_option_names::kEpContextEmbedMode, MakeStringWithClassicLocale(info.trt_ep_context_embed_mode)}, - {tensorrt::provider_option_names::kEpContextComputeCapabilityEnable, MakeStringWithClassicLocale(info.trt_ep_context_compute_capability_enable)}, }; return options; } @@ -305,6 +301,5 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options trt_provider_options_v2.trt_dump_ep_context_model = internal_options.dump_ep_context_model; trt_provider_options_v2.trt_ep_context_embed_mode = internal_options.ep_context_embed_mode; trt_provider_options_v2.trt_ep_context_file_path = copy_string_if_needed(internal_options.ep_context_file_path); - trt_provider_options_v2.trt_ep_context_compute_capability_enable = internal_options.ep_context_compute_capability_enable; } } // namespace onnxruntime diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h index 2518bdd5337a0..80424b8d6d196 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h @@ -54,7 +54,6 @@ struct TensorrtExecutionProviderInfo { bool dump_ep_context_model{false}; std::string ep_context_file_path{""}; int ep_context_embed_mode{0}; - bool ep_context_compute_capability_enable{0}; std::string engine_cache_prefix{""}; static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options); diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc index 722f24c3fd6ae..568da57a50956 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc @@ -112,7 +112,6 @@ struct Tensorrt_Provider : Provider { info.dump_ep_context_model = options.trt_dump_ep_context_model != 0; info.ep_context_file_path = options.trt_ep_context_file_path == nullptr ? "" : options.trt_ep_context_file_path; info.ep_context_embed_mode = options.trt_ep_context_embed_mode; - info.ep_context_compute_capability_enable = options.trt_ep_context_compute_capability_enable != 0; info.engine_cache_prefix = options.trt_engine_cache_prefix == nullptr ? "" : options.trt_engine_cache_prefix; return std::make_shared(info); diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index f715a07acdcb7..4b6c2a491334f 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -89,7 +89,7 @@ using IndexedSubGraph_MetaDef = IndexedSubGraph::MetaDef; #include "core/providers/cann/cann_provider_options.h" #include "core/providers/dnnl/dnnl_provider_options.h" -#ifdef USE_TENSORRT +#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) #include "core/session/onnxruntime_session_options_config_keys.h" #endif @@ -1422,7 +1422,6 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti trt_options_converted.trt_dump_ep_context_model = 0; trt_options_converted.trt_ep_context_file_path = ""; trt_options_converted.trt_ep_context_embed_mode = 0; - trt_options_converted.trt_ep_context_compute_capability_enable = 0; trt_options_converted.trt_engine_cache_prefix = ""; return trt_options_converted; @@ -1450,10 +1449,6 @@ void UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(OrtSessionOptio LOGS_DEFAULT(VERBOSE) << "Invalid ep.context_embed_mode: " << embed_mode << " only 0 or 1 allowed. Set to 1."; } LOGS_DEFAULT(VERBOSE) << "User specified context cache embed mode: " << tensorrt_options->trt_ep_context_embed_mode; - - auto context_hardware_arch_enable = (session_options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextHardwareArchitectureEnable, "0") != "0"; - tensorrt_options->trt_ep_context_compute_capability_enable = context_hardware_arch_enable; - LOGS_DEFAULT(VERBOSE) << "User specified context hardware architecture enable: " << tensorrt_options->trt_ep_context_compute_capability_enable; } } #endif diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 9ce3a9a5fa07d..03ba54007b77e 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -741,14 +741,6 @@ std::unique_ptr CreateExecutionProviderInstance( } else { ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_ep_context_embed_mode' should be a positive integer number i.e. '1'.\n"); } - } else if (option.first == "trt_ep_context_compute_capability_enable") { - if (option.second == "True" || option.second == "true") { - params.trt_ep_context_compute_capability_enable = true; - } else if (option.second == "False" || option.second == "false") { - params.trt_ep_context_compute_capability_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_ep_context_compute_capability_enable' should be 'True' or 'False'. Default value is 'False'.\n"); - } } else { ORT_THROW("Invalid TensorRT EP option: ", option.first); } diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index 508739ae1d235..4e20fb976b49b 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -191,6 +191,8 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string OrtTensorRTProviderOptionsV2 params; params.trt_engine_cache_enable = 1; params.trt_engine_cache_prefix = "TRTEP_Cache_Test"; + params.trt_dump_ep_context_model = 1; + params.trt_ep_context_file_path = "EP_Context_model.onnx"; std::unique_ptr execution_provider = TensorrtExecutionProviderWithOptions(¶ms); EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); auto status = session_object.Load(model_name); @@ -209,6 +211,9 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string // Verify on cache with customized prefix ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_engine_cache_prefix)); + + // Verify EP context model with user provided name + ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path)); } void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string sess_log_id, bool has_non_zero_node = false) { @@ -448,6 +453,8 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) { params.trt_engine_cache_enable = 1; params.trt_engine_cache_prefix = "TRTEP_Cache_Test"; + params.trt_dump_ep_context_model = 1; + params.trt_ep_context_file_path = "EP_Context_model.onnx"; std::unique_ptr execution_provider = TensorrtExecutionProviderWithOptions(¶ms); EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); auto status = session_object.Load(model_name); @@ -576,6 +583,9 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) { // Verify on cache with customized prefix ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_engine_cache_prefix)); + // Verify EP context model with user provided name + ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path)); + if (input_type.compare("static") == 0) { // Can't run inference since input shape changes but the engine is built with static input ASSERT_FALSE(status.IsOK()); From 2487933d869b39b9d427a34060664e8b8eb257b9 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 17 Jan 2024 18:33:46 +0000 Subject: [PATCH 10/25] remove trt_ep_context_compute_capability_check (cont.) --- .../providers/tensorrt/onnx_ctx_model_helper.cc | 15 ++++++--------- .../providers/tensorrt/onnx_ctx_model_helper.h | 1 - .../tensorrt/tensorrt_execution_provider.cc | 11 +---------- .../providers/tensorrt/tensorrt_basic_test.cc | 7 +++++++ 4 files changed, 14 insertions(+), 20 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc index 0bf0cd2635f13..d91ceb4211d44 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc @@ -74,7 +74,6 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer, char* engine_data, size_t size, const int64_t embed_mode, - bool compute_capability_enable, std::string compute_capability, const logging::Logger* logger) { auto model_build = graph_viewer.CreateModel(*logger); @@ -111,18 +110,16 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer, } else { attr_1->set_s(engine_cache_path); } + attr_2->set_name(COMPUTE_CAPABILITY); + attr_2->set_type(onnx::AttributeProto_AttributeType_STRING); + attr_2->set_s(compute_capability); + auto node_attributes = ONNX_NAMESPACE::NodeAttributes::Create(); - int num_attributes = compute_capability_enable ? 3 : 2; + int num_attributes = 3; node_attributes->reserve(num_attributes); node_attributes->emplace(EMBED_MODE, *attr_0); node_attributes->emplace(EP_CACHE_CONTEXT, *attr_1); - - if (compute_capability_enable) { - attr_2->set_name(COMPUTE_CAPABILITY); - attr_2->set_type(onnx::AttributeProto_AttributeType_STRING); - attr_2->set_s(compute_capability); - node_attributes->emplace(COMPUTE_CAPABILITY, *attr_2); - } + node_attributes->emplace(COMPUTE_CAPABILITY, *attr_2); // Create EP context node graph_build.AddNode(EPCONTEXT_OP, EPCONTEXT_OP, "", inputs, outputs, node_attributes.get(), EPCONTEXT_OP_DOMAIN); diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h index 90fbdb7537bec..5c27300ef9d17 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h @@ -28,7 +28,6 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer, char* engine_data, size_t size, const int64_t embed_mode, - bool compute_capability_enable, std::string compute_capability, const logging::Logger* logger); std::string GetCtxNodeModelPath(const std::string& ep_context_file_path, diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index bfa6a2cc40834..a6b97a0c21d07 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1381,7 +1381,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv dump_ep_context_model_ = info.dump_ep_context_model; ep_context_file_path_ = info.ep_context_file_path; ep_context_embed_mode_ = info.ep_context_embed_mode; - ep_context_compute_capability_enable_ = info.ep_context_compute_capability_enable; } else { try { const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations); @@ -1552,11 +1551,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv ep_context_embed_mode_ = std::stoi(ep_context_embed_mode_env); } - const std::string ep_context_compute_capability_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEpContextComputeCapabilityEnable); - if (!ep_context_compute_capability_env.empty()) { - ep_context_compute_capability_enable_ = (std::stoi(ep_context_compute_capability_env) == 0 ? false : true); - } - } catch (const std::invalid_argument& ex) { LOGS_DEFAULT(WARNING) << "[TensorRT EP] Invalid Argument (from environment variables): " << ex.what(); } catch (const std::out_of_range& ex) { @@ -1699,7 +1693,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv << ", trt_dump_ep_context_model: " << dump_ep_context_model_ << ", trt_ep_context_file_path: " << ep_context_file_path_ << ", trt_ep_context_embed_mode: " << ep_context_embed_mode_ - << ", trt_ep_context_compute_capability_enable: " << ep_context_compute_capability_enable_ << ", trt_cache_prefix: " << cache_prefix_; } @@ -3003,7 +2996,6 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView reinterpret_cast(serialized_engine->data()), serialized_engine->size(), ep_context_embed_mode_, - ep_context_compute_capability_enable_, compute_capability_, GetLogger())}; DumpCtxNodeModel(model_proto.get(), ctx_model_path_); @@ -3071,7 +3063,6 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView nullptr, 0, ep_context_embed_mode_, - ep_context_compute_capability_enable_, compute_capability_, GetLogger())); if (ep_context_embed_mode_ == 0) { @@ -3589,7 +3580,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con std::unordered_map output_types; // TRT engine output name -> ORT output tensor type // Get engine binary data and deserialize it - auto trt_cache_model_handler = TensorRTCacheModelHandler(&trt_engine, runtime_.get(), compute_capability_, ep_context_compute_capability_enable_); + auto trt_cache_model_handler = TensorRTCacheModelHandler(&trt_engine, runtime_.get(), compute_capability_); auto status = trt_cache_model_handler.GetEpContextFromGraph(graph_body_viewer); if (status != Status::OK()) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, status.ErrorMessage()); diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index 4e20fb976b49b..a1544b617b732 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -353,6 +353,13 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) { ASSERT_EQ(model_hash, model_hash3) << "model 1&3 are same models and they have same hash, no matter where they are loaded"; } +TEST(TensorrtExecutionProviderTest, EPContextNode) { + std::string model_name = "trt_execution_provider_multithreading_test.onnx"; + std::string graph_name = "multithreading_test"; + std::string sess_log_id = "TRTEPMultiThreadingTestWithOneSessionMultiThreads"; + std::vector dims = {1, 3, 2}; +} + TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) { std::string model_name = "testdata/trt_plugin_custom_op_test.onnx"; SessionOptions so; From 178e86fd60ebe48af6d12a7a368c6bb69bc66482 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 17 Jan 2024 22:39:47 +0000 Subject: [PATCH 11/25] add unit test --- .../providers/tensorrt/tensorrt_basic_test.cc | 96 +++++++++++++++++-- 1 file changed, 90 insertions(+), 6 deletions(-) diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index a1544b617b732..dc860839cd147 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -122,9 +122,15 @@ void CreateBaseModel(std::string model_name, status = onnxruntime::Model::Save(model, model_name); } -bool HasCacheFileWithPrefix(const std::string& prefix) { - const std::filesystem::path current_dir = std::filesystem::current_path(); - for (const auto& entry : std::filesystem::directory_iterator(current_dir)) { +bool HasCacheFileWithPrefix(const std::string& prefix, std::string file_dir = "") { + std::filesystem::path target_dir; + if (file_dir.empty()) { + target_dir = std::filesystem::current_path(); + } else { + target_dir = std::filesystem::path(file_dir); + } + + for (const auto& entry : std::filesystem::directory_iterator(target_dir)) { if (entry.is_regular_file()) { std::string filename = entry.path().filename().string(); if (filename.rfind(prefix, 0) == 0) { @@ -354,10 +360,88 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) { } TEST(TensorrtExecutionProviderTest, EPContextNode) { - std::string model_name = "trt_execution_provider_multithreading_test.onnx"; - std::string graph_name = "multithreading_test"; - std::string sess_log_id = "TRTEPMultiThreadingTestWithOneSessionMultiThreads"; + std::string model_name = "EPContextNode_test.onnx"; + std::string graph_name = "EPContextNode_test"; + std::string sess_log_id = "EPContextNode_test"; std::vector dims = {1, 3, 2}; + CreateBaseModel(model_name, graph_name, dims); + + SessionOptions so; + so.session_logid = sess_log_id; + RunOptions run_options; + run_options.run_tag = so.session_logid; + InferenceSession session_object{so, GetEnvironment()}; + auto cuda_provider = DefaultCudaExecutionProvider(); + auto cpu_allocator = cuda_provider->CreatePreferredAllocators()[1]; + std::vector dims_mul_x = {1, 3, 2}; + std::vector values_mul_x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + OrtValue ml_value_x; + CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_x); + OrtValue ml_value_y; + CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_y); + OrtValue ml_value_z; + CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_z); + NameMLValMap feeds; + feeds.insert(std::make_pair("X", ml_value_x)); + feeds.insert(std::make_pair("Y", ml_value_y)); + feeds.insert(std::make_pair("Z", ml_value_z)); + + // prepare outputs + std::vector output_names; + output_names.push_back("M"); + + // prepare expected inputs and outputs + std::vector expected_dims_mul_m = {1, 3, 2}; + std::vector expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f}; + + // Test dumping EP context model to provided path + OrtTensorRTProviderOptionsV2 params; + params.trt_engine_cache_enable = 1; + params.trt_dump_ep_context_model = 1; + params.trt_ep_context_file_path = "EP_Context_model.onnx"; + std::unique_ptr execution_provider = TensorrtExecutionProviderWithOptions(¶ms); + EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); + auto status = session_object.Load(model_name); + ASSERT_TRUE(status.IsOK()); + status = session_object.Initialize(); + ASSERT_TRUE(status.IsOK()); + // "EP_Context_model.onnx" should be created + ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path)); + + // Test dumping EP context model to provided path + InferenceSession session_object2{so, GetEnvironment()}; + OrtTensorRTProviderOptionsV2 params2; + params2.trt_engine_cache_enable = 1; + params2.trt_dump_ep_context_model = 1; + params2.trt_engine_cache_path = "./trt_engine_cache"; + params2.trt_ep_context_file_path = "EP_Context_model.onnx"; + execution_provider = TensorrtExecutionProviderWithOptions(¶ms2); + EXPECT_TRUE(session_object2.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); + status = session_object2.Load(model_name); + ASSERT_TRUE(status.IsOK()); + status = session_object2.Initialize(); + ASSERT_TRUE(status.IsOK()); + // "./trt_engine_cache/EP_Context_model.onnx" should be created + ASSERT_TRUE(HasCacheFileWithPrefix(params2.trt_ep_context_file_path, params2.trt_engine_cache_path)); + + // Test EP context model inference + InferenceSession session_object3{so, GetEnvironment()}; + OrtTensorRTProviderOptionsV2 params3; + model_name = "EP_Context_model.onnx"; + execution_provider = TensorrtExecutionProviderWithOptions(¶ms3); + EXPECT_TRUE(session_object3.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); + status = session_object3.Load(model_name); + ASSERT_TRUE(status.IsOK()); + status = session_object3.Initialize(); + ASSERT_TRUE(status.IsOK()); + // run inference + // TRT engine will be created and cached + // TRT profile will be created and cached only for dynamic input shape + // Data in profile, + // X: 1, 3, 3, 2, 2, 2 + // Y: 1, 3, 3, 2, 2, 2 + // Z: 1, 3, 3, 2, 2, 2 + RunSession(session_object3, run_options, feeds, output_names, expected_dims_mul_m, expected_values_mul_m); } TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) { From a69758d7edebd62ca903ec77aea50905a68c2197 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 17 Jan 2024 22:41:41 +0000 Subject: [PATCH 12/25] lintrunner -a --- .../providers/tensorrt/onnx_ctx_model_helper.cc | 14 +++++++------- .../providers/tensorrt/onnx_ctx_model_helper.h | 3 ++- onnxruntime/core/session/provider_bridge_ort.cc | 6 +++--- onnxruntime/python/onnxruntime_pybind_state.cc | 12 ++++++------ .../tensorrt/gen_trt_engine_wrapper_onnx_model.py | 1 + .../test/providers/tensorrt/tensorrt_basic_test.cc | 2 +- 6 files changed, 20 insertions(+), 18 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc index d91ceb4211d44..232dbfd882017 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc @@ -137,8 +137,8 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer, /* * Get "EP context node" model path - * - * + * + * * If ep_context_file_path is provided: * - If ep_context_file_path is a file: * - If it's a file name without any path associated with it, return "engine_cache_path/ep_context_file_path". @@ -146,14 +146,14 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer, * - If ep_context_file_path is a directory, return "ep_context_file_path/original_model_name_ctx.onnx". * If ep_context_file_path is not provided: * - Return "engine_cache_path/original_model_name_ctx.onnx". - * - * + * + * * Example 1: * ep_context_file_path = "/home/user/ep_context_model_foler" * engine_cache_path = "trt_engine.engine" * original_model_path = "model.onnx" * => return "/home/user/ep_context_model_folder/model_ctx.onnx" - * + * * Example 2: * ep_context_file_path = "my_ctx_model.onnx" * engine_cache_path = "/home/user/cache_folder/trt_engine.engine" @@ -165,13 +165,13 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer, * engine_cache_path = "trt_engine.engine" * original_model_path = "model.onnx" * => return "/home/user2/ep_context_model_foler/my_ctx_model.onnx" - * + * * Example 4: * ep_context_file_path = "" * engine_cache_path = "/home/user3/cache_folder/trt_engine.engine" * original_model_path = "model.onnx" * => return "/home/user3/cache_folder/model_ctx.onnx" - * + * */ std::string GetCtxNodeModelPath(const std::string& ep_context_file_path, const std::string& engine_cache_path, diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h index 5c27300ef9d17..d4f53a1d532c1 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h @@ -16,7 +16,8 @@ static const std::string EMBED_MODE = "embed_mode"; static const std::string EP_CACHE_CONTEXT = "ep_cache_context"; static const std::string COMPUTE_CAPABILITY = "hardware_architecture"; static const std::string EPCONTEXT_OP_DOMAIN = "com.microsoft"; -static const std::string EPCONTEXT_WARNING = "It's suggested to set the ORT graph optimization level to 0 and \ +static const std::string EPCONTEXT_WARNING = + "It's suggested to set the ORT graph optimization level to 0 and \ make \"embed_mode\" to 0 (\"ep_cache_context\" is the cache path)\ for the best model loading time"; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 4b6c2a491334f..9d26c13ce47a3 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1480,10 +1480,10 @@ std::shared_ptr TensorrtProviderFactoryCreator::Creat std::shared_ptr TensorrtProviderFactoryCreator::Create(void* session_options, const OrtTensorRTProviderOptionsV2* provider_options) { // We need to create a new provider options V2 object and copy from provider_options, due to the "const" object pointed by provider_options can't be modified. - // + // // Note: No need to worry about tensorrt_options being a local variable, CreateExecutionProviderFactory() in TRT EP will // create a factory object that copies any provider options from tensorrt_options including "const char*" provider options. - OrtTensorRTProviderOptionsV2 tensorrt_options = *provider_options; // copy and assign from provider_options + OrtTensorRTProviderOptionsV2 tensorrt_options = *provider_options; // copy and assign from provider_options #if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast(session_options), &tensorrt_options); @@ -1768,7 +1768,7 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_MIGraphX, _In_ OrtS ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptions* tensorrt_options) { API_IMPL_BEGIN - + std::shared_ptr factory; #if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 03ba54007b77e..f7ed5520727db 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -729,12 +729,12 @@ std::unique_ptr CreateExecutionProviderInstance( ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_ep_context_model' should be 'True' or 'False'. Default value is 'False'.\n"); } } else if (option.first == "trt_ep_context_file_path") { - if (!option.second.empty()) { - ep_context_file_path = option.second; - params.trt_ep_context_file_path = ep_context_file_path.c_str(); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_ep_context_file_path' should be a string.\n"); - } + if (!option.second.empty()) { + ep_context_file_path = option.second; + params.trt_ep_context_file_path = ep_context_file_path.c_str(); + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_ep_context_file_path' should be a string.\n"); + } } else if (option.first == "trt_ep_context_embed_mode") { if (!option.second.empty()) { params.trt_ep_context_embed_mode = std::stoi(option.second); diff --git a/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py b/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py index 92e92699299d5..b94c2cb76a635 100644 --- a/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py +++ b/onnxruntime/python/tools/tensorrt/gen_trt_engine_wrapper_onnx_model.py @@ -32,6 +32,7 @@ def __init__(self, args): trt.init_libnvinfer_plugins(logger, "") if len(self.plugins): import ctypes + ctypes.CDLL(self.plugins) # Deserialize an TRT engine diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index dc860839cd147..a1d19ecbabdcf 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -123,7 +123,7 @@ void CreateBaseModel(std::string model_name, } bool HasCacheFileWithPrefix(const std::string& prefix, std::string file_dir = "") { - std::filesystem::path target_dir; + std::filesystem::path target_dir; if (file_dir.empty()) { target_dir = std::filesystem::current_path(); } else { From c14efe1ca1111fcf91ebdcfbab5251aa42fce1a0 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Thu, 18 Jan 2024 01:58:42 +0000 Subject: [PATCH 13/25] remove newly added two factory create functions --- .../tensorrt_provider_factory_creator.h | 2 - .../core/session/provider_bridge_ort.cc | 49 ++++++++----------- 2 files changed, 20 insertions(+), 31 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h index 96917c8fb8e88..d905003fb7cc1 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory_creator.h @@ -15,8 +15,6 @@ namespace onnxruntime { struct TensorrtProviderFactoryCreator { static std::shared_ptr Create(int device_id); static std::shared_ptr Create(const OrtTensorRTProviderOptions* provider_options); - static std::shared_ptr Create(void* session_options, const OrtTensorRTProviderOptions* provider_options); static std::shared_ptr Create(const OrtTensorRTProviderOptionsV2* provider_options); - static std::shared_ptr Create(void* session_options, const OrtTensorRTProviderOptionsV2* provider_options); }; } // namespace onnxruntime diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 9d26c13ce47a3..d529c9312a4ab 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1462,37 +1462,10 @@ std::shared_ptr TensorrtProviderFactoryCreator::Creat return s_library_tensorrt.Get().CreateExecutionProviderFactory(&trt_options_converted); } -std::shared_ptr TensorrtProviderFactoryCreator::Create(void* session_options, const OrtTensorRTProviderOptions* provider_options) { - OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(provider_options); - -#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) - onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast(session_options), &trt_options_converted); -#else - ORT_UNUSED_PARAMETER(session_options); -#endif - - return s_library_tensorrt.Get().CreateExecutionProviderFactory(&trt_options_converted); -} - std::shared_ptr TensorrtProviderFactoryCreator::Create(const OrtTensorRTProviderOptionsV2* provider_options) { return s_library_tensorrt.Get().CreateExecutionProviderFactory(provider_options); } -std::shared_ptr TensorrtProviderFactoryCreator::Create(void* session_options, const OrtTensorRTProviderOptionsV2* provider_options) { - // We need to create a new provider options V2 object and copy from provider_options, due to the "const" object pointed by provider_options can't be modified. - // - // Note: No need to worry about tensorrt_options being a local variable, CreateExecutionProviderFactory() in TRT EP will - // create a factory object that copies any provider options from tensorrt_options including "const char*" provider options. - OrtTensorRTProviderOptionsV2 tensorrt_options = *provider_options; // copy and assign from provider_options - -#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) - onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(reinterpret_cast(session_options), &tensorrt_options); -#else - ORT_UNUSED_PARAMETER(session_options); -#endif - return s_library_tensorrt.Get().CreateExecutionProviderFactory(&tensorrt_options); -} - std::shared_ptr MIGraphXProviderFactoryCreator::Create(const OrtMIGraphXProviderOptions* provider_options) { return s_library_migraphx.Get().CreateExecutionProviderFactory(provider_options); } @@ -1779,7 +1752,14 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In // If EP context configs are provided in session options, we need to propagate them to provider options if (ep_context_cache_enabled_from_sess_options) { - factory = onnxruntime::TensorrtProviderFactoryCreator::Create(options, tensorrt_options); + OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(tensorrt_options); + +#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) + onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(options, &trt_options_converted); +#else + ORT_UNUSED_PARAMETER(session_options); +#endif + factory = onnxruntime::TensorrtProviderFactoryCreator::Create(&trt_options_converted); } else { factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options); } @@ -1935,7 +1915,18 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2, // if provider options already have the EP context configs provided, the configs in session options will be ignored // since provider options has higher priority than session options. if (!ep_context_cache_enabled_from_provider_options && ep_context_cache_enabled_from_sess_options) { - factory = onnxruntime::TensorrtProviderFactoryCreator::Create(options, tensorrt_options); + // We need to create a new provider options V2 object and copy from provider_options, due to the "const" object pointed by provider_options can't be modified. + // + // Note: No need to worry about tensorrt_options being a local variable, CreateExecutionProviderFactory() in TRT EP will + // create a factory object that copies any provider options from tensorrt_options including "const char*" provider options. + OrtTensorRTProviderOptionsV2 new_tensorrt_options = *tensorrt_options; // copy and assign from tensorrt_options + +#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) + onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(options, &new_tensorrt_options); +#else + ORT_UNUSED_PARAMETER(session_options); +#endif + factory = onnxruntime::TensorrtProviderFactoryCreator::Create(&new_tensorrt_options); } else { factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options); } From f00d137872565cd854397e07f1c32ea544546cab Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Thu, 18 Jan 2024 17:00:40 +0000 Subject: [PATCH 14/25] fix compile error --- onnxruntime/core/session/provider_bridge_ort.cc | 9 --------- 1 file changed, 9 deletions(-) diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index d529c9312a4ab..5dcb3613946a7 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1754,11 +1754,7 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In if (ep_context_cache_enabled_from_sess_options) { OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(tensorrt_options); -#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(options, &trt_options_converted); -#else - ORT_UNUSED_PARAMETER(session_options); -#endif factory = onnxruntime::TensorrtProviderFactoryCreator::Create(&trt_options_converted); } else { factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options); @@ -1916,16 +1912,11 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2, // since provider options has higher priority than session options. if (!ep_context_cache_enabled_from_provider_options && ep_context_cache_enabled_from_sess_options) { // We need to create a new provider options V2 object and copy from provider_options, due to the "const" object pointed by provider_options can't be modified. - // // Note: No need to worry about tensorrt_options being a local variable, CreateExecutionProviderFactory() in TRT EP will // create a factory object that copies any provider options from tensorrt_options including "const char*" provider options. OrtTensorRTProviderOptionsV2 new_tensorrt_options = *tensorrt_options; // copy and assign from tensorrt_options -#if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) onnxruntime::UpdateOrtTensorRTProviderOptionsV2FromSessionOptionsConfigs(options, &new_tensorrt_options); -#else - ORT_UNUSED_PARAMETER(session_options); -#endif factory = onnxruntime::TensorrtProviderFactoryCreator::Create(&new_tensorrt_options); } else { factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options); From 997913b34c04caae31b9e1b644067ce6ce744907 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Thu, 18 Jan 2024 19:34:12 +0000 Subject: [PATCH 15/25] fix compile error --- onnxruntime/core/session/provider_bridge_ort.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 5dcb3613946a7..1b3b7de7d89c5 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1746,10 +1746,6 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In #if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") != "0"; -#else - auto ep_context_cache_enabled_from_sess_options = false; -#endif - // If EP context configs are provided in session options, we need to propagate them to provider options if (ep_context_cache_enabled_from_sess_options) { OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(tensorrt_options); @@ -1759,6 +1755,11 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In } else { factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options); } +#else + factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options); +#endif + + if (!factory) { return OrtApis::CreateStatus(ORT_FAIL, "SessionOptionsAppendExecutionProvider_Tensorrt: Failed to load shared library"); @@ -1902,10 +1903,6 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2, #if !defined(ORT_MINIMAL_BUILD) && defined(USE_TENSORRT) auto ep_context_cache_enabled_from_provider_options = tensorrt_options->trt_dump_ep_context_model != 0; auto ep_context_cache_enabled_from_sess_options = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") != "0"; -#else - auto ep_context_cache_enabled_from_provider_options = false; - auto ep_context_cache_enabled_from_sess_options = false; -#endif // If EP context configs are provided in session options, we need to propagate them to provider options. However, // if provider options already have the EP context configs provided, the configs in session options will be ignored @@ -1921,6 +1918,9 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2, } else { factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options); } +#else + factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options); +#endif if (!factory) { return OrtApis::CreateStatus(ORT_FAIL, "OrtSessionOptionsAppendExecutionProvider_TensorRT: Failed to load shared library"); From 3bc8e793d6ecdf10cf27ed47e556112773c9a19a Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 19 Jan 2024 06:59:02 +0000 Subject: [PATCH 16/25] Make 'ep_cache_context' node attribute only have restrictive path for security purpose --- .../tensorrt/onnx_ctx_model_helper.cc | 170 ++++++++++-------- .../tensorrt/onnx_ctx_model_helper.h | 18 +- .../tensorrt/tensorrt_execution_provider.cc | 72 +++++--- 3 files changed, 152 insertions(+), 108 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc index 232dbfd882017..1c64df344e475 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc @@ -38,13 +38,6 @@ const onnxruntime::Path& GetModelPath(const GraphViewer& graph_viewer) { return main_graph.ModelPath(); } -std::filesystem::path LocateEngineRelativeToPath(std::string engine_cache_path, const onnxruntime::Path& path) { - std::filesystem::path base_path(path.ToPathString()); - std::filesystem::path parent_path = base_path.parent_path(); - std::filesystem::path engine_path = parent_path.append(engine_cache_path); - return engine_path; -} - /* * Update ep_cache_context attribute of the EP context node with the given engine binary data */ @@ -69,7 +62,7 @@ void UpdateCtxNodeModelEngineContext(ONNX_NAMESPACE::ModelProto* model_proto, /* * Create "EP context node" model where engine information is embedded */ -ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer, +ONNX_NAMESPACE::ModelProto* CreateCtxModel(const GraphViewer& graph_viewer, const std::string engine_cache_path, char* engine_data, size_t size, @@ -136,60 +129,56 @@ ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer, } /* - * Get "EP context node" model path - * + * Return the directory where the ep context model locates + */ +std::filesystem::path GetPathOrParentPathOfCtxModel(const std::string& ep_context_file_path) { + if (ep_context_file_path.empty()) { + return std::filesystem::path(); + } + std::filesystem::path ctx_path(ep_context_file_path); + if (std::filesystem::is_directory(ep_context_file_path)) { + return ctx_path; + } else { + return ctx_path.parent_path(); + } +} + +/* + * Get "EP context" model path. * - * If ep_context_file_path is provided: - * - If ep_context_file_path is a file: - * - If it's a file name without any path associated with it, return "engine_cache_path/ep_context_file_path". - - If it's a file name with path associated with it, return "ep_context_file_path". + * Function logic: + * If ep_context_file_path is provided, + * - If ep_context_file_path is a file, return "ep_context_file_path". * - If ep_context_file_path is a directory, return "ep_context_file_path/original_model_name_ctx.onnx". - * If ep_context_file_path is not provided: - * - Return "engine_cache_path/original_model_name_ctx.onnx". + * If ep_context_file_path is not provided, + * - Return "original_model_name_ctx.onnx". * + * TRT EP has rules about context model path and engine cache path (see tensorrt_execution_provider.cc): + * - If dump_ep_context_model_ and engine_cache_enabled_ is enabled, TRT EP will dump context model and save engine cache + * to the same directory provided by ep_context_file_path_. (i.e. engine_cache_path_ = ep_context_file_path_) * * Example 1: * ep_context_file_path = "/home/user/ep_context_model_foler" - * engine_cache_path = "trt_engine.engine" * original_model_path = "model.onnx" * => return "/home/user/ep_context_model_folder/model_ctx.onnx" * * Example 2: * ep_context_file_path = "my_ctx_model.onnx" - * engine_cache_path = "/home/user/cache_folder/trt_engine.engine" * original_model_path = "model.onnx" - * => return "/home/user/cache_folder/my_ctx_model.onnx" + * => return "my_ctx_model.onnx" * * Example 3: * ep_context_file_path = "/home/user2/ep_context_model_foler/my_ctx_model.onnx" - * engine_cache_path = "trt_engine.engine" * original_model_path = "model.onnx" * => return "/home/user2/ep_context_model_foler/my_ctx_model.onnx" * - * Example 4: - * ep_context_file_path = "" - * engine_cache_path = "/home/user3/cache_folder/trt_engine.engine" - * original_model_path = "model.onnx" - * => return "/home/user3/cache_folder/model_ctx.onnx" - * */ -std::string GetCtxNodeModelPath(const std::string& ep_context_file_path, - const std::string& engine_cache_path, - const std::string& original_model_path) { +std::string GetCtxModelPath(const std::string& ep_context_file_path, + const std::string& original_model_path) { std::string ctx_model_path; if (!ep_context_file_path.empty() && !std::filesystem::is_directory(ep_context_file_path)) { - std::filesystem::path ctx_model_file_path = ep_context_file_path; - if (ctx_model_file_path.filename().string() == ep_context_file_path) { - std::filesystem::path cache_path = engine_cache_path; - if (cache_path.has_parent_path()) { - ctx_model_path = cache_path.parent_path().append(ep_context_file_path).string(); - } else { - ctx_model_path = ep_context_file_path; - } - } else { - ctx_model_path = ep_context_file_path; - } + ctx_model_path = ep_context_file_path; } else { std::filesystem::path model_path = original_model_path; std::filesystem::path model_name_stem = model_path.stem(); // model_name.onnx -> model_name @@ -199,28 +188,54 @@ std::string GetCtxNodeModelPath(const std::string& ep_context_file_path, std::filesystem::path model_directory = ep_context_file_path; ctx_model_path = model_directory.append(ctx_model_name).string(); } else { - std::filesystem::path cache_path = engine_cache_path; - if (cache_path.has_parent_path()) { - ctx_model_path = cache_path.parent_path().append(ctx_model_name).string(); - } else { - ctx_model_path = ctx_model_name; - } + ctx_model_path = ctx_model_name; } } return ctx_model_path; } /* - * Dump "EP context node" model + * Dump "EP context" model * */ -void DumpCtxNodeModel(ONNX_NAMESPACE::ModelProto* model_proto, +void DumpCtxModel(ONNX_NAMESPACE::ModelProto* model_proto, const std::string& ctx_model_path) { std::fstream dump(ctx_model_path, std::ios::out | std::ios::trunc | std::ios::binary); model_proto->SerializeToOstream(dump); LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Dumped " + ctx_model_path; } +bool IsAbsolutePath(std::string& path_string) { +#ifdef _WIN32 + onnxruntime::PathString ort_path_string = onnxruntime::ToPathString(path_string); + auto path = std::filesystem::path(ort_path_string.c_str()); + return path.is_absolute(); +#else + if (!path_string.empty() && path_string[0] == '/') { + return true; + } + return false; +#endif +} + +// Like "../file_path" +bool IsRelativePathToParentPath(std::string& path_string) { +#ifdef _WIN32 + onnxruntime::PathString ort_path_string = onnxruntime::ToPathString(path_string); + auto path = std::filesystem::path(ort_path_string.c_str()); + auto relative_path = path.lexically_normal().make_preferred().wstring(); + if (relative_path.find(L"..", 0) != std::string::npos) { + return true; + } + return false; +#else + if (!path_string.empty() && path_string.find("..", 0) != std::string::npos) { + return true; + } + return false; +#endif +} + Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph_viewer) { if (!ValidateEPCtxNode(graph_viewer)) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "It's not a valid EP Context node"); @@ -229,8 +244,8 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph auto& attrs = node->GetAttributes(); const int64_t embed_mode = attrs.at(EMBED_MODE).i(); - if (embed_mode) { - // Get engine from byte stream + if (embed_mode) { + // Get engine from byte stream. const std::string& context_binary = attrs.at(EP_CACHE_CONTEXT).s(); *(trt_engine_) = std::unique_ptr(trt_runtime_->deserializeCudaEngine(const_cast(context_binary.c_str()), static_cast(context_binary.length()))); @@ -239,20 +254,37 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP could not deserialize engine from binary data"); } - } else { - // Get engine from cache file - std::ifstream engine_file(engine_cache_path_.string(), std::ios::binary | std::ios::in); + } else { + // Get engine from cache file. + std::string cache_path = attrs.at(EP_CACHE_CONTEXT).s(); + + // For security purpose, in the case of running context model, TRT EP won't allow + // engine cache path to be the relative path like "../file_path" or the absolute path. + // It only allows the engine cache to be in the same directory or sub directory of the context model. + if (IsAbsolutePath(cache_path)) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "For security purpose, the ep_cache_context attribute should be set with a relative path, but it is an absolute path: " + cache_path); + } + if (IsRelativePathToParentPath(cache_path)) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "The file path in ep_cache_context attribute has '..'. For security purpose, it's not allowed to point outside the directory."); + } + + // The engine cache and context model (current model) should be in the same directory + std::filesystem::path ctx_model_dir(GetPathOrParentPathOfCtxModel(ep_context_model_path_)); + auto engine_cache_path = ctx_model_dir.append(cache_path); + + std::ifstream engine_file(engine_cache_path.string(), std::ios::binary | std::ios::in); engine_file.seekg(0, std::ios::end); size_t engine_size = engine_file.tellg(); engine_file.seekg(0, std::ios::beg); std::unique_ptr engine_buf{new char[engine_size]}; engine_file.read((char*)engine_buf.get(), engine_size); *(trt_engine_) = std::unique_ptr(trt_runtime_->deserializeCudaEngine(engine_buf.get(), engine_size)); - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path_.string(); if (!(*trt_engine_)) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not deserialize engine from cache: " + engine_cache_path_.string()); + "TensorRT EP could not deserialize engine from cache: " + engine_cache_path.string() + + ". Please make sure engine cache is inside the directory of trt_ep_context_file_path."); } + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path.string(); } return Status::OK(); } @@ -277,27 +309,15 @@ bool TensorRTCacheModelHandler::ValidateEPCtxNode(const GraphViewer& graph_viewe } // "embed_mode" attr and "ep_cache_context" attr should be present - if (attrs.count(EMBED_MODE) > 0 && attrs.count(EP_CACHE_CONTEXT) > 0) { - // ep_cache_context: payload of the execution provider context if embed_mode=1, or path to the context file if embed_mode=0 - const int64_t embed_mode = attrs.at(EMBED_MODE).i(); - - // engine cache path - if (embed_mode == 0) { - // First assume engine cache path is relatvie to model path, - // If not, then assume the engine cache path is an absolute path. - engine_cache_path_ = LocateEngineRelativeToPath(attrs.at(EP_CACHE_CONTEXT).s(), GetModelPath(graph_viewer)); - auto default_engine_cache_path_ = engine_cache_path_; - if (!std::filesystem::exists(engine_cache_path_)) { - engine_cache_path_.assign(attrs.at(EP_CACHE_CONTEXT).s()); - if (!std::filesystem::exists(engine_cache_path_)) { - LOGS_DEFAULT(ERROR) << "Can't find " << default_engine_cache_path_.string() << " or " << engine_cache_path_.string() << " TensorRT engine"; - return false; - } - } - } else if (embed_mode == 1) { - LOGS_DEFAULT(WARNING) << EPCONTEXT_WARNING; - } + assert(attrs.count(EMBED_MODE) > 0); + assert(attrs.count(EP_CACHE_CONTEXT) > 0); + + const int64_t embed_mode = attrs.at(EMBED_MODE).i(); + if (embed_mode == 1) { + // engine binary data + LOGS_DEFAULT(WARNING) << EPCONTEXT_WARNING; } + return true; } } // namespace onnxruntime diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h index d4f53a1d532c1..8ff686c859d03 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h @@ -23,18 +23,19 @@ static const std::string EPCONTEXT_WARNING = bool GraphHasCtxNode(const GraphViewer& graph_viewer); const onnxruntime::Path& GetModelPath(const GraphViewer& graph_viewer); -std::filesystem::path LocateEngineRelativeToPath(std::string engine_cache_path, const onnxruntime::Path& path); -ONNX_NAMESPACE::ModelProto* CreateCtxNodeModel(const GraphViewer& graph_viewer, +std::filesystem::path GetPathOrParentPathOfCtxModel(const std::string& ep_context_file_path); +ONNX_NAMESPACE::ModelProto* CreateCtxModel(const GraphViewer& graph_viewer, const std::string engine_cache_path, char* engine_data, size_t size, const int64_t embed_mode, std::string compute_capability, const logging::Logger* logger); -std::string GetCtxNodeModelPath(const std::string& ep_context_file_path, - const std::string& engine_cache_path, - const std::string& original_model_path); -void DumpCtxNodeModel(ONNX_NAMESPACE::ModelProto* model_proto, +std::string GetCtxModelPath(const std::string& ep_context_file_path, + const std::string& original_model_path); +bool IsAbsolutePath(std::string& path_string); +bool IsRelativePathToParentPath(std::string& path_string); +void DumpCtxModel(ONNX_NAMESPACE::ModelProto* model_proto, const std::string& ctx_model_path); void UpdateCtxNodeModelEngineContext(ONNX_NAMESPACE::ModelProto* model_proto, char* engine_data, @@ -44,7 +45,8 @@ class TensorRTCacheModelHandler { public: TensorRTCacheModelHandler(std::unique_ptr* trt_engine, nvinfer1::IRuntime* trt_runtime, - std::string compute_capability) : trt_engine_(trt_engine), trt_runtime_(trt_runtime), compute_capability_(compute_capability) { + std::string ep_context_model_path, + std::string compute_capability) : trt_engine_(trt_engine), trt_runtime_(trt_runtime), ep_context_model_path_(ep_context_model_path), compute_capability_(compute_capability) { } ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TensorRTCacheModelHandler); @@ -55,7 +57,7 @@ class TensorRTCacheModelHandler { private: std::unique_ptr* trt_engine_; nvinfer1::IRuntime* trt_runtime_; - std::filesystem::path engine_cache_path_; + std::string ep_context_model_path_; // If using context model, it implies context model and engine cache is in the same directory std::string compute_capability_; }; // TRTCacheModelHandler } // namespace onnxruntime diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index a6b97a0c21d07..27df32c9a17fd 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1348,6 +1348,9 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv timing_cache_enable_ = info.timing_cache_enable; force_timing_cache_match_ = info.force_timing_cache; detailed_build_log_ = info.detailed_build_log; + dump_ep_context_model_ = info.dump_ep_context_model; + ep_context_file_path_ = info.ep_context_file_path; + ep_context_embed_mode_ = info.ep_context_embed_mode; if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) { cache_path_ = info.engine_cache_path; cache_prefix_ = info.engine_cache_prefix; @@ -1378,9 +1381,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv profile_max_shapes = info.profile_max_shapes; profile_opt_shapes = info.profile_opt_shapes; cuda_graph_enable_ = info.cuda_graph_enable; - dump_ep_context_model_ = info.dump_ep_context_model; - ep_context_file_path_ = info.ep_context_file_path; - ep_context_embed_mode_ = info.ep_context_embed_mode; } else { try { const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations); @@ -1458,6 +1458,21 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv if (!timing_force_match_env.empty()) { force_timing_cache_match_ = (std::stoi(timing_force_match_env) == 0 ? false : true); } + + const std::string dump_ep_context_model_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDumpEpContextModel); + if (!dump_ep_context_model_env.empty()) { + dump_ep_context_model_ = (std::stoi(dump_ep_context_model_env) == 0 ? false : true); + } + + const std::string ep_context_file_path_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEpContextComputeCapabilityEnable); + if (!ep_context_file_path_env.empty()) { + ep_context_file_path_ = ep_context_file_path_env; + } + + const std::string ep_context_embed_mode_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEpContextEmbedMode); + if (!ep_context_embed_mode_env.empty()) { + ep_context_embed_mode_ = std::stoi(ep_context_embed_mode_env); + } if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) { const std::string engine_cache_path = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEngineCachePath); @@ -1536,21 +1551,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv cuda_graph_enable_ = (std::stoi(cuda_graph_enable_env) == 0 ? false : true); } - const std::string dump_ep_context_model_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDumpEpContextModel); - if (!dump_ep_context_model_env.empty()) { - dump_ep_context_model_ = (std::stoi(dump_ep_context_model_env) == 0 ? false : true); - } - - const std::string ep_context_file_path_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEpContextComputeCapabilityEnable); - if (!ep_context_file_path_env.empty()) { - ep_context_file_path_ = ep_context_file_path_env; - } - - const std::string ep_context_embed_mode_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEpContextEmbedMode); - if (!ep_context_embed_mode_env.empty()) { - ep_context_embed_mode_ = std::stoi(ep_context_embed_mode_env); - } - } catch (const std::invalid_argument& ex) { LOGS_DEFAULT(WARNING) << "[TensorRT EP] Invalid Argument (from environment variables): " << ex.what(); } catch (const std::out_of_range& ex) { @@ -1655,6 +1655,28 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv } } + // If ep_context_file_path_ is provided as a directory, create it if it's not existed + if (!ep_context_file_path_.empty() && std::filesystem::path(ep_context_file_path_).extension().empty() && !std::filesystem::is_directory(ep_context_file_path_)) { + if (!std::filesystem::create_directory(ep_context_file_path_)) { + throw std::runtime_error("Failed to create directory " + ep_context_file_path_); + } + } + + // If dump_ep_context_model is enable, TRT EP forces cache_path_ to be the relative path of ep_context_file_path_. + // The cache path will be saved as the "ep_cache_context" node attritue of the EP context node. + // For security reason, it needs to make sure the engine cache is saved inside context model directory. + if (dump_ep_context_model_ && engine_cache_enable_) { + if (IsAbsolutePath(cache_path_)) { + LOGS_DEFAULT(ERROR) << "In the case of dumping context model and for security purpose, the trt_engine_cache_path should be set with a relative path, but it is an absolute path: " << cache_path_; + } + if (IsRelativePathToParentPath(cache_path_)) { + LOGS_DEFAULT(ERROR) << "In the case of dumping context model and for security purpose, The trt_engine_cache_path has '..', it's not allowed to point outside the directory."; + } + + // Make cache_path_ to be the relative path of ep_context_file_path_ + cache_path_ = GetPathOrParentPathOfCtxModel(ep_context_file_path_).append(cache_path_).string(); + } + { auto lock = GetApiLock(); runtime_ = std::unique_ptr(nvinfer1::createInferRuntime(GetTensorrtLogger())); @@ -2852,7 +2874,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView // Generate file name for dumping ep context model if (dump_ep_context_model_ && ctx_model_path_.empty()) { - ctx_model_path_ = GetCtxNodeModelPath(ep_context_file_path_, engine_cache_path, model_path_); + ctx_model_path_ = GetCtxModelPath(ep_context_file_path_, model_path_); } if (!has_dynamic_shape) { @@ -2991,14 +3013,14 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView } // dump EP context node model if (dump_ep_context_model_) { - std::unique_ptr model_proto{CreateCtxNodeModel(graph_body_viewer, + std::unique_ptr model_proto{CreateCtxModel(graph_body_viewer, engine_cache_path, reinterpret_cast(serialized_engine->data()), serialized_engine->size(), ep_context_embed_mode_, compute_capability_, GetLogger())}; - DumpCtxNodeModel(model_proto.get(), ctx_model_path_); + DumpCtxModel(model_proto.get(), ctx_model_path_); } } } @@ -3058,7 +3080,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView // TRT EP will serialize the model at inference time due to engine can be updated and the updated engine should be included in the model. // However, if the embed_mode is 0 (only includes engine path), TRT EP will serialize it here. if (dump_ep_context_model_ && has_dynamic_shape) { - model_proto_.reset(CreateCtxNodeModel(graph_body_viewer, + model_proto_.reset(CreateCtxModel(graph_body_viewer, engine_cache_path, nullptr, 0, @@ -3066,7 +3088,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView compute_capability_, GetLogger())); if (ep_context_embed_mode_ == 0) { - DumpCtxNodeModel(model_proto_.get(), ctx_model_path_); + DumpCtxModel(model_proto_.get(), ctx_model_path_); } } @@ -3387,7 +3409,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView // dump ep context model if (dump_ep_context_model_ && ep_context_embed_mode_) { UpdateCtxNodeModelEngineContext(model_proto_.get(), reinterpret_cast(serialized_engine->data()), serialized_engine->size()); - DumpCtxNodeModel(model_proto_.get(), ctx_model_path_); + DumpCtxModel(model_proto_.get(), ctx_model_path_); } context_update = true; } @@ -3580,7 +3602,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con std::unordered_map output_types; // TRT engine output name -> ORT output tensor type // Get engine binary data and deserialize it - auto trt_cache_model_handler = TensorRTCacheModelHandler(&trt_engine, runtime_.get(), compute_capability_); + auto trt_cache_model_handler = TensorRTCacheModelHandler(&trt_engine, runtime_.get(), model_path_, compute_capability_); auto status = trt_cache_model_handler.GetEpContextFromGraph(graph_body_viewer); if (status != Status::OK()) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, status.ErrorMessage()); From cbfee7f49a38c20998c7c72cbab14a6d315d95f4 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 19 Jan 2024 07:01:01 +0000 Subject: [PATCH 17/25] lintrunner -a --- .../tensorrt/onnx_ctx_model_helper.cc | 22 ++++++++-------- .../tensorrt/onnx_ctx_model_helper.h | 16 ++++++------ .../tensorrt/tensorrt_execution_provider.cc | 26 +++++++++---------- .../core/session/provider_bridge_ort.cc | 2 -- 4 files changed, 32 insertions(+), 34 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc index 1c64df344e475..47bcdb58a8a72 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc @@ -63,12 +63,12 @@ void UpdateCtxNodeModelEngineContext(ONNX_NAMESPACE::ModelProto* model_proto, * Create "EP context node" model where engine information is embedded */ ONNX_NAMESPACE::ModelProto* CreateCtxModel(const GraphViewer& graph_viewer, - const std::string engine_cache_path, - char* engine_data, - size_t size, - const int64_t embed_mode, - std::string compute_capability, - const logging::Logger* logger) { + const std::string engine_cache_path, + char* engine_data, + size_t size, + const int64_t embed_mode, + std::string compute_capability, + const logging::Logger* logger) { auto model_build = graph_viewer.CreateModel(*logger); auto& graph_build = model_build->MainGraph(); @@ -199,7 +199,7 @@ std::string GetCtxModelPath(const std::string& ep_context_file_path, * */ void DumpCtxModel(ONNX_NAMESPACE::ModelProto* model_proto, - const std::string& ctx_model_path) { + const std::string& ctx_model_path) { std::fstream dump(ctx_model_path, std::ios::out | std::ios::trunc | std::ios::binary); model_proto->SerializeToOstream(dump); LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Dumped " + ctx_model_path; @@ -244,7 +244,7 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph auto& attrs = node->GetAttributes(); const int64_t embed_mode = attrs.at(EMBED_MODE).i(); - if (embed_mode) { + if (embed_mode) { // Get engine from byte stream. const std::string& context_binary = attrs.at(EP_CACHE_CONTEXT).s(); *(trt_engine_) = std::unique_ptr(trt_runtime_->deserializeCudaEngine(const_cast(context_binary.c_str()), @@ -254,7 +254,7 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP could not deserialize engine from binary data"); } - } else { + } else { // Get engine from cache file. std::string cache_path = attrs.at(EP_CACHE_CONTEXT).s(); @@ -281,8 +281,8 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph *(trt_engine_) = std::unique_ptr(trt_runtime_->deserializeCudaEngine(engine_buf.get(), engine_size)); if (!(*trt_engine_)) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not deserialize engine from cache: " + engine_cache_path.string() + - ". Please make sure engine cache is inside the directory of trt_ep_context_file_path."); + "TensorRT EP could not deserialize engine from cache: " + engine_cache_path.string() + + ". Please make sure engine cache is inside the directory of trt_ep_context_file_path."); } LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path.string(); } diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h index 8ff686c859d03..bf3bf9e3495d7 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h @@ -25,18 +25,18 @@ bool GraphHasCtxNode(const GraphViewer& graph_viewer); const onnxruntime::Path& GetModelPath(const GraphViewer& graph_viewer); std::filesystem::path GetPathOrParentPathOfCtxModel(const std::string& ep_context_file_path); ONNX_NAMESPACE::ModelProto* CreateCtxModel(const GraphViewer& graph_viewer, - const std::string engine_cache_path, - char* engine_data, - size_t size, - const int64_t embed_mode, - std::string compute_capability, - const logging::Logger* logger); + const std::string engine_cache_path, + char* engine_data, + size_t size, + const int64_t embed_mode, + std::string compute_capability, + const logging::Logger* logger); std::string GetCtxModelPath(const std::string& ep_context_file_path, const std::string& original_model_path); bool IsAbsolutePath(std::string& path_string); bool IsRelativePathToParentPath(std::string& path_string); void DumpCtxModel(ONNX_NAMESPACE::ModelProto* model_proto, - const std::string& ctx_model_path); + const std::string& ctx_model_path); void UpdateCtxNodeModelEngineContext(ONNX_NAMESPACE::ModelProto* model_proto, char* engine_data, size_t size); @@ -57,7 +57,7 @@ class TensorRTCacheModelHandler { private: std::unique_ptr* trt_engine_; nvinfer1::IRuntime* trt_runtime_; - std::string ep_context_model_path_; // If using context model, it implies context model and engine cache is in the same directory + std::string ep_context_model_path_; // If using context model, it implies context model and engine cache is in the same directory std::string compute_capability_; }; // TRTCacheModelHandler } // namespace onnxruntime diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index 27df32c9a17fd..8150fd19baa11 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1458,7 +1458,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv if (!timing_force_match_env.empty()) { force_timing_cache_match_ = (std::stoi(timing_force_match_env) == 0 ? false : true); } - + const std::string dump_ep_context_model_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDumpEpContextModel); if (!dump_ep_context_model_env.empty()) { dump_ep_context_model_ = (std::stoi(dump_ep_context_model_env) == 0 ? false : true); @@ -3014,12 +3014,12 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView // dump EP context node model if (dump_ep_context_model_) { std::unique_ptr model_proto{CreateCtxModel(graph_body_viewer, - engine_cache_path, - reinterpret_cast(serialized_engine->data()), - serialized_engine->size(), - ep_context_embed_mode_, - compute_capability_, - GetLogger())}; + engine_cache_path, + reinterpret_cast(serialized_engine->data()), + serialized_engine->size(), + ep_context_embed_mode_, + compute_capability_, + GetLogger())}; DumpCtxModel(model_proto.get(), ctx_model_path_); } } @@ -3081,12 +3081,12 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView // However, if the embed_mode is 0 (only includes engine path), TRT EP will serialize it here. if (dump_ep_context_model_ && has_dynamic_shape) { model_proto_.reset(CreateCtxModel(graph_body_viewer, - engine_cache_path, - nullptr, - 0, - ep_context_embed_mode_, - compute_capability_, - GetLogger())); + engine_cache_path, + nullptr, + 0, + ep_context_embed_mode_, + compute_capability_, + GetLogger())); if (ep_context_embed_mode_ == 0) { DumpCtxModel(model_proto_.get(), ctx_model_path_); } diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 1b3b7de7d89c5..3269c9f0f4e4b 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1759,8 +1759,6 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In factory = onnxruntime::TensorrtProviderFactoryCreator::Create(tensorrt_options); #endif - - if (!factory) { return OrtApis::CreateStatus(ORT_FAIL, "SessionOptionsAppendExecutionProvider_Tensorrt: Failed to load shared library"); } From d0e7a488fde4ef9f16d076eefe37d8b95f672d78 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 19 Jan 2024 17:02:15 +0000 Subject: [PATCH 18/25] update --- .../tensorrt/onnx_ctx_model_helper.cc | 6 +-- .../tensorrt/tensorrt_execution_provider.cc | 47 ++++++++++--------- .../providers/tensorrt/tensorrt_basic_test.cc | 27 ++++++----- 3 files changed, 44 insertions(+), 36 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc index 47bcdb58a8a72..0f659c91ed800 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc @@ -158,7 +158,7 @@ std::filesystem::path GetPathOrParentPathOfCtxModel(const std::string& ep_contex * to the same directory provided by ep_context_file_path_. (i.e. engine_cache_path_ = ep_context_file_path_) * * Example 1: - * ep_context_file_path = "/home/user/ep_context_model_foler" + * ep_context_file_path = "/home/user/ep_context_model_directory" * original_model_path = "model.onnx" * => return "/home/user/ep_context_model_folder/model_ctx.onnx" * @@ -168,9 +168,9 @@ std::filesystem::path GetPathOrParentPathOfCtxModel(const std::string& ep_contex * => return "my_ctx_model.onnx" * * Example 3: - * ep_context_file_path = "/home/user2/ep_context_model_foler/my_ctx_model.onnx" + * ep_context_file_path = "/home/user2/ep_context_model_directory/my_ctx_model.onnx" * original_model_path = "model.onnx" - * => return "/home/user2/ep_context_model_foler/my_ctx_model.onnx" + * => return "/home/user2/ep_context_model_directory/my_ctx_model.onnx" * */ std::string GetCtxModelPath(const std::string& ep_context_file_path, diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index 8150fd19baa11..b8be0c0b0f766 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1578,6 +1578,31 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv dla_core_ = 0; } + // If dump_ep_context_model_ is enable, TRT EP forces cache_path_ to be the relative path of ep_context_file_path_. + // For example, + // - original cache path = "engine_cache_dir" -> new cache path = "./context_model_dir/engine_cache_dir" + // - original cache path = "" -> new cache path = "./context_model_dir" + // The new cache path will be saved as the "ep_cache_context" node attritue of the EP context node. + // For security reason, it needs to make sure the engine cache is saved inside context model directory. + if (dump_ep_context_model_ && engine_cache_enable_) { + if (IsAbsolutePath(cache_path_)) { + LOGS_DEFAULT(ERROR) << "In the case of dumping context model and for security purpose, the trt_engine_cache_path should be set with a relative path, but it is an absolute path: " << cache_path_; + } + if (IsRelativePathToParentPath(cache_path_)) { + LOGS_DEFAULT(ERROR) << "In the case of dumping context model and for security purpose, The trt_engine_cache_path has '..', it's not allowed to point outside the directory."; + } + + // Make cache_path_ to be the relative path of ep_context_file_path_ + cache_path_ = GetPathOrParentPathOfCtxModel(ep_context_file_path_).append(cache_path_).string(); + } + + // If ep_context_file_path_ is provided as a directory, create it if it's not existed + if (!ep_context_file_path_.empty() && std::filesystem::path(ep_context_file_path_).extension().empty() && !std::filesystem::is_directory(ep_context_file_path_)) { + if (!std::filesystem::create_directory(ep_context_file_path_)) { + throw std::runtime_error("Failed to create directory " + ep_context_file_path_); + } + } + if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) { if (!cache_path_.empty() && !fs::is_directory(cache_path_)) { if (!fs::create_directory(cache_path_)) { @@ -1655,28 +1680,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv } } - // If ep_context_file_path_ is provided as a directory, create it if it's not existed - if (!ep_context_file_path_.empty() && std::filesystem::path(ep_context_file_path_).extension().empty() && !std::filesystem::is_directory(ep_context_file_path_)) { - if (!std::filesystem::create_directory(ep_context_file_path_)) { - throw std::runtime_error("Failed to create directory " + ep_context_file_path_); - } - } - - // If dump_ep_context_model is enable, TRT EP forces cache_path_ to be the relative path of ep_context_file_path_. - // The cache path will be saved as the "ep_cache_context" node attritue of the EP context node. - // For security reason, it needs to make sure the engine cache is saved inside context model directory. - if (dump_ep_context_model_ && engine_cache_enable_) { - if (IsAbsolutePath(cache_path_)) { - LOGS_DEFAULT(ERROR) << "In the case of dumping context model and for security purpose, the trt_engine_cache_path should be set with a relative path, but it is an absolute path: " << cache_path_; - } - if (IsRelativePathToParentPath(cache_path_)) { - LOGS_DEFAULT(ERROR) << "In the case of dumping context model and for security purpose, The trt_engine_cache_path has '..', it's not allowed to point outside the directory."; - } - - // Make cache_path_ to be the relative path of ep_context_file_path_ - cache_path_ = GetPathOrParentPathOfCtxModel(ep_context_file_path_).append(cache_path_).string(); - } - { auto lock = GetApiLock(); runtime_ = std::unique_ptr(nvinfer1::createInferRuntime(GetTensorrtLogger())); diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index a1d19ecbabdcf..225c920326470 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -394,7 +394,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { std::vector expected_dims_mul_m = {1, 3, 2}; std::vector expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f}; - // Test dumping EP context model to provided path + // Dump context model with specific name OrtTensorRTProviderOptionsV2 params; params.trt_engine_cache_enable = 1; params.trt_dump_ep_context_model = 1; @@ -405,29 +405,34 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { ASSERT_TRUE(status.IsOK()); status = session_object.Initialize(); ASSERT_TRUE(status.IsOK()); - // "EP_Context_model.onnx" should be created - ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path)); + ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path)); // "EP_Context_model.onnx" should be created - // Test dumping EP context model to provided path + // Dump context model to specific path InferenceSession session_object2{so, GetEnvironment()}; OrtTensorRTProviderOptionsV2 params2; params2.trt_engine_cache_enable = 1; params2.trt_dump_ep_context_model = 1; - params2.trt_engine_cache_path = "./trt_engine_cache"; - params2.trt_ep_context_file_path = "EP_Context_model.onnx"; + params2.trt_engine_cache_prefix = "TRT_engine_cache"; + params2.trt_engine_cache_path = "engine_cache_folder"; // due to dump_ep_context_model = 1, the new cache path is ./context_model_folder/engine_cache_folder + params2.trt_ep_context_file_path = "./context_model_folder"; execution_provider = TensorrtExecutionProviderWithOptions(¶ms2); EXPECT_TRUE(session_object2.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); status = session_object2.Load(model_name); ASSERT_TRUE(status.IsOK()); status = session_object2.Initialize(); ASSERT_TRUE(status.IsOK()); - // "./trt_engine_cache/EP_Context_model.onnx" should be created - ASSERT_TRUE(HasCacheFileWithPrefix(params2.trt_ep_context_file_path, params2.trt_engine_cache_path)); - - // Test EP context model inference + auto new_engine_cache_path = std::filesystem::path(params2.trt_ep_context_file_path).append(params2.trt_engine_cache_path).string(); + // Test engine cache path: + // "./context_model_folder/engine_cache_folder/TRT_engine_cache...engine" should be created + ASSERT_TRUE(HasCacheFileWithPrefix(params2.trt_engine_cache_prefix, new_engine_cache_path)); + // Test context model path: + // "./context_model_folder/EPContextNode_test_ctx.onnx" should be created + ASSERT_TRUE(HasCacheFileWithPrefix("EPContextNode_test_ctx.onnx", params2.trt_ep_context_file_path)); + + // Context model inference InferenceSession session_object3{so, GetEnvironment()}; OrtTensorRTProviderOptionsV2 params3; - model_name = "EP_Context_model.onnx"; + model_name = params.trt_ep_context_file_path; execution_provider = TensorrtExecutionProviderWithOptions(¶ms3); EXPECT_TRUE(session_object3.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); status = session_object3.Load(model_name); From 14748c052e9d4bfca7b35cd5165193948eec3602 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 19 Jan 2024 17:22:50 +0000 Subject: [PATCH 19/25] add comment --- .../tensorrt/tensorrt_provider_options.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h index 1d9af3f18d184..dc782fd54f1c1 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h @@ -48,8 +48,26 @@ struct OrtTensorRTProviderOptionsV2 { const char* trt_profile_max_shapes{nullptr}; // Specify the range of the input shapes to build the engine with const char* trt_profile_opt_shapes{nullptr}; // Specify the range of the input shapes to build the engine with int trt_cuda_graph_enable{0}; // Enable CUDA graph in ORT TRT + + /* + * Please note that there are rules for using following context model related provider options: + * + * 1. In the case of dumping the context model and loading the context model, + * for security reason, TRT EP doesn't allow the "ep_cache_context" node attribute of EP context node to be + * the absolute path or relative path that is outside of context model directory. + * It means engine cache needs to be in the same directory or sub-directory of context model. + * + * 2. In the case of dumping the context model, the engine cache path will be changed to the relative path of context model directory. + * For example: + * If "trt_dump_ep_context_model" is enabled and "trt_engine_cache_enable" is enabled, + * if "trt_ep_context_file_path" is "./context_model_dir", + * - if "trt_engine_cache_path" is "" -> the engine cache will be saved to "./context_model_dir" + * - if "trt_engine_cache_path" is "engine_dir" -> the engine cache will be saved to "./context_model_dir/engine_dir" + * + */ int trt_dump_ep_context_model{0}; // Dump EP context node model const char* trt_ep_context_file_path{nullptr}; // Specify file name to dump EP context node model. int trt_ep_context_embed_mode{0}; // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data + const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix }; From 34747accd0c251fe4c7242ae498771f7cab88c3e Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 19 Jan 2024 17:24:12 +0000 Subject: [PATCH 20/25] lintrunner -a --- .../tensorrt/tensorrt_provider_options.h | 24 +++++++++---------- .../tensorrt/tensorrt_execution_provider.cc | 2 +- .../providers/tensorrt/tensorrt_basic_test.cc | 6 ++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h index dc782fd54f1c1..0e0c184934582 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h @@ -48,26 +48,26 @@ struct OrtTensorRTProviderOptionsV2 { const char* trt_profile_max_shapes{nullptr}; // Specify the range of the input shapes to build the engine with const char* trt_profile_opt_shapes{nullptr}; // Specify the range of the input shapes to build the engine with int trt_cuda_graph_enable{0}; // Enable CUDA graph in ORT TRT - + /* * Please note that there are rules for using following context model related provider options: - * - * 1. In the case of dumping the context model and loading the context model, + * + * 1. In the case of dumping the context model and loading the context model, * for security reason, TRT EP doesn't allow the "ep_cache_context" node attribute of EP context node to be * the absolute path or relative path that is outside of context model directory. * It means engine cache needs to be in the same directory or sub-directory of context model. - * - * 2. In the case of dumping the context model, the engine cache path will be changed to the relative path of context model directory. - * For example: + * + * 2. In the case of dumping the context model, the engine cache path will be changed to the relative path of context model directory. + * For example: * If "trt_dump_ep_context_model" is enabled and "trt_engine_cache_enable" is enabled, * if "trt_ep_context_file_path" is "./context_model_dir", * - if "trt_engine_cache_path" is "" -> the engine cache will be saved to "./context_model_dir" * - if "trt_engine_cache_path" is "engine_dir" -> the engine cache will be saved to "./context_model_dir/engine_dir" - * + * */ - int trt_dump_ep_context_model{0}; // Dump EP context node model - const char* trt_ep_context_file_path{nullptr}; // Specify file name to dump EP context node model. - int trt_ep_context_embed_mode{0}; // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data - - const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix + int trt_dump_ep_context_model{0}; // Dump EP context node model + const char* trt_ep_context_file_path{nullptr}; // Specify file name to dump EP context node model. + int trt_ep_context_embed_mode{0}; // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data + + const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix }; diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index b8be0c0b0f766..ec36660a7e6d6 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1579,7 +1579,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv } // If dump_ep_context_model_ is enable, TRT EP forces cache_path_ to be the relative path of ep_context_file_path_. - // For example, + // For example, // - original cache path = "engine_cache_dir" -> new cache path = "./context_model_dir/engine_cache_dir" // - original cache path = "" -> new cache path = "./context_model_dir" // The new cache path will be saved as the "ep_cache_context" node attritue of the EP context node. diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index 225c920326470..048a4de1685cd 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -405,7 +405,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { ASSERT_TRUE(status.IsOK()); status = session_object.Initialize(); ASSERT_TRUE(status.IsOK()); - ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path)); // "EP_Context_model.onnx" should be created + ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path)); // "EP_Context_model.onnx" should be created // Dump context model to specific path InferenceSession session_object2{so, GetEnvironment()}; @@ -413,7 +413,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { params2.trt_engine_cache_enable = 1; params2.trt_dump_ep_context_model = 1; params2.trt_engine_cache_prefix = "TRT_engine_cache"; - params2.trt_engine_cache_path = "engine_cache_folder"; // due to dump_ep_context_model = 1, the new cache path is ./context_model_folder/engine_cache_folder + params2.trt_engine_cache_path = "engine_cache_folder"; // due to dump_ep_context_model = 1, the new cache path is ./context_model_folder/engine_cache_folder params2.trt_ep_context_file_path = "./context_model_folder"; execution_provider = TensorrtExecutionProviderWithOptions(¶ms2); EXPECT_TRUE(session_object2.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); @@ -428,7 +428,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { // Test context model path: // "./context_model_folder/EPContextNode_test_ctx.onnx" should be created ASSERT_TRUE(HasCacheFileWithPrefix("EPContextNode_test_ctx.onnx", params2.trt_ep_context_file_path)); - + // Context model inference InferenceSession session_object3{so, GetEnvironment()}; OrtTensorRTProviderOptionsV2 params3; From 22045cc4892453da125c75c4d83290a8e844a94c Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 19 Jan 2024 22:38:24 +0000 Subject: [PATCH 21/25] add unit test --- .../tensorrt/tensorrt_provider_options.h | 2 +- .../tensorrt/onnx_ctx_model_helper.cc | 9 ++- .../tensorrt/tensorrt_execution_provider.cc | 44 +++++++----- .../tensorrt/tensorrt_execution_provider.h | 3 +- .../providers/tensorrt/tensorrt_basic_test.cc | 67 +++++++++++++++++-- 5 files changed, 99 insertions(+), 26 deletions(-) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h index 0e0c184934582..32a9f06464ace 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h @@ -66,7 +66,7 @@ struct OrtTensorRTProviderOptionsV2 { * */ int trt_dump_ep_context_model{0}; // Dump EP context node model - const char* trt_ep_context_file_path{nullptr}; // Specify file name to dump EP context node model. + const char* trt_ep_context_file_path{nullptr}; // Specify file name to dump EP context node model. Can be a path or a file name or a file name with path. int trt_ep_context_embed_mode{0}; // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc index 0f659c91ed800..1994d1f5ab0b8 100644 --- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc @@ -272,6 +272,12 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph std::filesystem::path ctx_model_dir(GetPathOrParentPathOfCtxModel(ep_context_model_path_)); auto engine_cache_path = ctx_model_dir.append(cache_path); + if (!std::filesystem::exists(engine_cache_path)) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP can't find engine cache: " + engine_cache_path.string() + + ". Please make sure engine cache is in the same directory or sub-directory of context model."); + } + std::ifstream engine_file(engine_cache_path.string(), std::ios::binary | std::ios::in); engine_file.seekg(0, std::ios::end); size_t engine_size = engine_file.tellg(); @@ -281,8 +287,7 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph *(trt_engine_) = std::unique_ptr(trt_runtime_->deserializeCudaEngine(engine_buf.get(), engine_size)); if (!(*trt_engine_)) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not deserialize engine from cache: " + engine_cache_path.string() + - ". Please make sure engine cache is inside the directory of trt_ep_context_file_path."); + "TensorRT EP could not deserialize engine from cache: " + engine_cache_path.string()); } LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path.string(); } diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index ec36660a7e6d6..23417e668f34a 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1578,6 +1578,13 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv dla_core_ = 0; } + // If ep_context_file_path_ is provided as a directory, create it if it's not existed + if (dump_ep_context_model_ && !ep_context_file_path_.empty() && std::filesystem::path(ep_context_file_path_).extension().empty() && !std::filesystem::is_directory(ep_context_file_path_)) { + if (!std::filesystem::create_directory(ep_context_file_path_)) { + throw std::runtime_error("Failed to create directory " + ep_context_file_path_); + } + } + // If dump_ep_context_model_ is enable, TRT EP forces cache_path_ to be the relative path of ep_context_file_path_. // For example, // - original cache path = "engine_cache_dir" -> new cache path = "./context_model_dir/engine_cache_dir" @@ -1596,13 +1603,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv cache_path_ = GetPathOrParentPathOfCtxModel(ep_context_file_path_).append(cache_path_).string(); } - // If ep_context_file_path_ is provided as a directory, create it if it's not existed - if (!ep_context_file_path_.empty() && std::filesystem::path(ep_context_file_path_).extension().empty() && !std::filesystem::is_directory(ep_context_file_path_)) { - if (!std::filesystem::create_directory(ep_context_file_path_)) { - throw std::runtime_error("Failed to create directory " + ep_context_file_path_); - } - } - if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) { if (!cache_path_.empty() && !fs::is_directory(cache_path_)) { if (!fs::create_directory(cache_path_)) { @@ -2335,6 +2335,14 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph, // Construct subgraph capability from node list std::vector> result; + // Get ModelPath + const auto& path_string = graph.ModelPath().ToPathString(); +#ifdef _WIN32 + wcstombs_s(nullptr, model_path_, sizeof(model_path_), path_string.c_str(), sizeof(model_path_)); +#else + strcpy(model_path_, path_string.c_str()); +#endif + // If the model consists of only a single "EPContext" contrib op, it means TRT EP can fetch the precompiled engine info from the node and // load the engine directly without having to go through the processes of graph proto reconstruction, calling TRT parser and engine compilation. // So, simply return the ComputeCapability here. @@ -2345,14 +2353,6 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph, return result; } - // Get ModelPath - const auto& path_string = graph.ModelPath().ToPathString(); -#ifdef _WIN32 - wcstombs_s(nullptr, model_path_, sizeof(model_path_), path_string.c_str(), sizeof(model_path_)); -#else - strcpy(model_path_, path_string.c_str()); -#endif - // Generate unique kernel name for TRT graph HashValue model_hash = TRTGenerateId(graph); @@ -3016,8 +3016,14 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView } // dump EP context node model if (dump_ep_context_model_) { + + // "ep_cache_context" node attribute should be a relative path to context model directory + if (ep_cache_context_attr_.empty()) { + ep_cache_context_attr_ = std::filesystem::relative(engine_cache_path, ep_context_file_path_).string(); + } + std::unique_ptr model_proto{CreateCtxModel(graph_body_viewer, - engine_cache_path, + ep_cache_context_attr_, reinterpret_cast(serialized_engine->data()), serialized_engine->size(), ep_context_embed_mode_, @@ -3083,8 +3089,12 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView // TRT EP will serialize the model at inference time due to engine can be updated and the updated engine should be included in the model. // However, if the embed_mode is 0 (only includes engine path), TRT EP will serialize it here. if (dump_ep_context_model_ && has_dynamic_shape) { + // "ep_cache_context" node attribute should be a relative path to context model directory + if (ep_cache_context_attr_.empty()) { + ep_cache_context_attr_ = std::filesystem::relative(engine_cache_path, ep_context_file_path_).string(); + } model_proto_.reset(CreateCtxModel(graph_body_viewer, - engine_cache_path, + ep_cache_context_attr_, nullptr, 0, ep_context_embed_mode_, diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index 86645fabd36d9..70b71aa221eef 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -293,7 +293,6 @@ class TensorrtExecutionProvider : public IExecutionProvider { bool force_timing_cache_match_ = false; bool detailed_build_log_ = false; bool cuda_graph_enable_ = false; - std::string ctx_model_path_; std::string cache_prefix_; // The OrtAllocator object will be get during ep compute time @@ -304,6 +303,8 @@ class TensorrtExecutionProvider : public IExecutionProvider { bool dump_ep_context_model_ = false; std::string ep_context_file_path_; int ep_context_embed_mode_ = 0; + std::string ctx_model_path_; + std::string ep_cache_context_attr_; std::unique_ptr model_proto_ = ONNX_NAMESPACE::ModelProto::Create(); std::unordered_set control_flow_op_set_ = {"If", "Loop", "Scan"}; diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index 048a4de1685cd..69834b934b0f8 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -394,7 +394,16 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { std::vector expected_dims_mul_m = {1, 3, 2}; std::vector expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f}; - // Dump context model with specific name + /* + * Test case 1: Dump context model + * + * provider options=> + * trt_ep_context_file_path = "EP_Context_model.onnx" + * + * expected result => + * context model "EP_Context_model.onnx" should be created in current directory + * + */ OrtTensorRTProviderOptionsV2 params; params.trt_engine_cache_enable = 1; params.trt_dump_ep_context_model = 1; @@ -405,16 +414,27 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { ASSERT_TRUE(status.IsOK()); status = session_object.Initialize(); ASSERT_TRUE(status.IsOK()); - ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path)); // "EP_Context_model.onnx" should be created + ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path)); - // Dump context model to specific path + /* + * Test case 2: Dump context model + * + * provider options=> + * trt_engine_cache_prefix = "TRT_engine_cache" + * trt_ep_context_file_path = "context_model_folder" + * trt_engine_cache_path = "engine_cache_folder" + * + * expected result => + * engine cache "./context_model_folder/engine_cache_folder/TRT_engine_cache...engine" should be created + * context model "./context_model_folder/EPContextNode_test_ctx.onnx" should be created + */ InferenceSession session_object2{so, GetEnvironment()}; OrtTensorRTProviderOptionsV2 params2; params2.trt_engine_cache_enable = 1; params2.trt_dump_ep_context_model = 1; params2.trt_engine_cache_prefix = "TRT_engine_cache"; params2.trt_engine_cache_path = "engine_cache_folder"; // due to dump_ep_context_model = 1, the new cache path is ./context_model_folder/engine_cache_folder - params2.trt_ep_context_file_path = "./context_model_folder"; + params2.trt_ep_context_file_path = "context_model_folder"; execution_provider = TensorrtExecutionProviderWithOptions(¶ms2); EXPECT_TRUE(session_object2.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); status = session_object2.Load(model_name); @@ -429,7 +449,16 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { // "./context_model_folder/EPContextNode_test_ctx.onnx" should be created ASSERT_TRUE(HasCacheFileWithPrefix("EPContextNode_test_ctx.onnx", params2.trt_ep_context_file_path)); - // Context model inference + /* + * Test case 3: Run the dumped context model + * + * context model path = "./EP_Context_model.onnx" (created from case 1) + * + * expected result=> + * engine cache is also in the same current dirctory as "./xxxxx.engine" + * and the "ep_cache_context" attribute node of the context model should point to that. + * + */ InferenceSession session_object3{so, GetEnvironment()}; OrtTensorRTProviderOptionsV2 params3; model_name = params.trt_ep_context_file_path; @@ -447,6 +476,34 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { // Y: 1, 3, 3, 2, 2, 2 // Z: 1, 3, 3, 2, 2, 2 RunSession(session_object3, run_options, feeds, output_names, expected_dims_mul_m, expected_values_mul_m); + + /* + * Test case 4: Run the dumped context model + * + * context model path = "./context_model_folder/EPContextNode_test_ctx.onnx" (created from case 2) + * + * expected result=> + * engine cache path is "./context_model_folder/engine_cache_folder/xxxxx.engine" + * and the "ep_cache_context" attribute node of the context model should point to that. + * + */ + InferenceSession session_object4{so, GetEnvironment()}; + OrtTensorRTProviderOptionsV2 params4; + model_name = "./context_model_folder/EPContextNode_test_ctx.onnx"; + execution_provider = TensorrtExecutionProviderWithOptions(¶ms4); + EXPECT_TRUE(session_object4.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); + status = session_object4.Load(model_name); + ASSERT_TRUE(status.IsOK()); + status = session_object4.Initialize(); + ASSERT_TRUE(status.IsOK()); + // run inference + // TRT engine will be created and cached + // TRT profile will be created and cached only for dynamic input shape + // Data in profile, + // X: 1, 3, 3, 2, 2, 2 + // Y: 1, 3, 3, 2, 2, 2 + // Z: 1, 3, 3, 2, 2, 2 + RunSession(session_object4, run_options, feeds, output_names, expected_dims_mul_m, expected_values_mul_m); } TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) { From 4f78c47859b50c14dd742968bf33545d6f50d264 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 19 Jan 2024 22:39:34 +0000 Subject: [PATCH 22/25] lintrunner -a --- .../tensorrt/tensorrt_execution_provider.cc | 1 - .../providers/tensorrt/tensorrt_basic_test.cc | 18 +++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index 23417e668f34a..f95009eb8643d 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -3016,7 +3016,6 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView } // dump EP context node model if (dump_ep_context_model_) { - // "ep_cache_context" node attribute should be a relative path to context model directory if (ep_cache_context_attr_.empty()) { ep_cache_context_attr_ = std::filesystem::relative(engine_cache_path, ep_context_file_path_).string(); diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index 69834b934b0f8..b748f3f079ba4 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -394,16 +394,16 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { std::vector expected_dims_mul_m = {1, 3, 2}; std::vector expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f}; - /* + /* * Test case 1: Dump context model - * + * * provider options=> * trt_ep_context_file_path = "EP_Context_model.onnx" - * + * * expected result => * context model "EP_Context_model.onnx" should be created in current directory - * - */ + * + */ OrtTensorRTProviderOptionsV2 params; params.trt_engine_cache_enable = 1; params.trt_dump_ep_context_model = 1; @@ -423,11 +423,11 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { * trt_engine_cache_prefix = "TRT_engine_cache" * trt_ep_context_file_path = "context_model_folder" * trt_engine_cache_path = "engine_cache_folder" - * + * * expected result => * engine cache "./context_model_folder/engine_cache_folder/TRT_engine_cache...engine" should be created * context model "./context_model_folder/EPContextNode_test_ctx.onnx" should be created - */ + */ InferenceSession session_object2{so, GetEnvironment()}; OrtTensorRTProviderOptionsV2 params2; params2.trt_engine_cache_enable = 1; @@ -453,7 +453,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { * Test case 3: Run the dumped context model * * context model path = "./EP_Context_model.onnx" (created from case 1) - * + * * expected result=> * engine cache is also in the same current dirctory as "./xxxxx.engine" * and the "ep_cache_context" attribute node of the context model should point to that. @@ -481,7 +481,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { * Test case 4: Run the dumped context model * * context model path = "./context_model_folder/EPContextNode_test_ctx.onnx" (created from case 2) - * + * * expected result=> * engine cache path is "./context_model_folder/engine_cache_folder/xxxxx.engine" * and the "ep_cache_context" attribute node of the context model should point to that. From da1207f4018204091b140592df24a00a66982528 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 20 Jan 2024 00:19:51 +0000 Subject: [PATCH 23/25] fix bug for unit test --- .../providers/tensorrt/tensorrt_basic_test.cc | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index b748f3f079ba4..73e0cf59d198c 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -462,6 +462,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { InferenceSession session_object3{so, GetEnvironment()}; OrtTensorRTProviderOptionsV2 params3; model_name = params.trt_ep_context_file_path; + params3.trt_engine_cache_enable = 1; execution_provider = TensorrtExecutionProviderWithOptions(¶ms3); EXPECT_TRUE(session_object3.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); status = session_object3.Load(model_name); @@ -504,6 +505,43 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { // Y: 1, 3, 3, 2, 2, 2 // Z: 1, 3, 3, 2, 2, 2 RunSession(session_object4, run_options, feeds, output_names, expected_dims_mul_m, expected_values_mul_m); + + /* + * Test case 5: Dump context model with embed_model = 1 + */ + InferenceSession session_object5{so, GetEnvironment()}; + OrtTensorRTProviderOptionsV2 params5; + params5.trt_dump_ep_context_model = 1; + params5.trt_ep_context_embed_mode = 1; + params5.trt_ep_context_file_path = "EP_Context_model_2.onnx"; + model_name = "EPContextNode_test.onnx"; + execution_provider = TensorrtExecutionProviderWithOptions(¶ms5); + EXPECT_TRUE(session_object5.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); + status = session_object5.Load(model_name); + ASSERT_TRUE(status.IsOK()); + status = session_object5.Initialize(); + ASSERT_TRUE(status.IsOK()); + + /* + * Test case 6: Run context model with embed_model = 1 (created from case 5) + */ + InferenceSession session_object6{so, GetEnvironment()}; + OrtTensorRTProviderOptionsV2 params6; + model_name = params5.trt_ep_context_file_path; + execution_provider = TensorrtExecutionProviderWithOptions(¶ms6); + EXPECT_TRUE(session_object6.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); + status = session_object6.Load(model_name); + ASSERT_TRUE(status.IsOK()); + status = session_object6.Initialize(); + ASSERT_TRUE(status.IsOK()); + // run inference + // TRT engine will be created and cached + // TRT profile will be created and cached only for dynamic input shape + // Data in profile, + // X: 1, 3, 3, 2, 2, 2 + // Y: 1, 3, 3, 2, 2, 2 + // Z: 1, 3, 3, 2, 2, 2 + RunSession(session_object6, run_options, feeds, output_names, expected_dims_mul_m, expected_values_mul_m); } TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) { From ec7c8f3a270c32acb8b8f3d1e42e306026868208 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 20 Jan 2024 06:56:06 +0000 Subject: [PATCH 24/25] handle relative path for 'ep_cache_context' node attribute --- .../providers/tensorrt/tensorrt_execution_provider.cc | 10 ++++++++-- .../providers/tensorrt/tensorrt_execution_provider.h | 1 + .../test/providers/tensorrt/tensorrt_basic_test.cc | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index f95009eb8643d..fe6b959b962de 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1599,6 +1599,10 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv LOGS_DEFAULT(ERROR) << "In the case of dumping context model and for security purpose, The trt_engine_cache_path has '..', it's not allowed to point outside the directory."; } + // Engine cache relative path to context model directory. + // It's used when dumping the "ep_cache_context" node attribute. + engine_cache_relative_path_to_context_model_dir = cache_path_; + // Make cache_path_ to be the relative path of ep_context_file_path_ cache_path_ = GetPathOrParentPathOfCtxModel(ep_context_file_path_).append(cache_path_).string(); } @@ -3018,7 +3022,8 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView if (dump_ep_context_model_) { // "ep_cache_context" node attribute should be a relative path to context model directory if (ep_cache_context_attr_.empty()) { - ep_cache_context_attr_ = std::filesystem::relative(engine_cache_path, ep_context_file_path_).string(); + auto cache_file_name = std::filesystem::path(engine_cache_path).filename(); + ep_cache_context_attr_ = std::filesystem::path(engine_cache_relative_path_to_context_model_dir).append(cache_file_name.string()).string(); } std::unique_ptr model_proto{CreateCtxModel(graph_body_viewer, @@ -3090,7 +3095,8 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView if (dump_ep_context_model_ && has_dynamic_shape) { // "ep_cache_context" node attribute should be a relative path to context model directory if (ep_cache_context_attr_.empty()) { - ep_cache_context_attr_ = std::filesystem::relative(engine_cache_path, ep_context_file_path_).string(); + auto cache_file_name = std::filesystem::path(engine_cache_path).filename(); + ep_cache_context_attr_ = std::filesystem::path(engine_cache_relative_path_to_context_model_dir).append(cache_file_name.string()).string(); } model_proto_.reset(CreateCtxModel(graph_body_viewer, ep_cache_context_attr_, diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index 70b71aa221eef..ad2d2c55c67e1 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -305,6 +305,7 @@ class TensorrtExecutionProvider : public IExecutionProvider { int ep_context_embed_mode_ = 0; std::string ctx_model_path_; std::string ep_cache_context_attr_; + std::string engine_cache_relative_path_to_context_model_dir; std::unique_ptr model_proto_ = ONNX_NAMESPACE::ModelProto::Create(); std::unordered_set control_flow_op_set_ = {"If", "Loop", "Scan"}; diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index 73e0cf59d198c..ff95d6e2c235c 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -527,6 +527,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { */ InferenceSession session_object6{so, GetEnvironment()}; OrtTensorRTProviderOptionsV2 params6; + params6.trt_ep_context_embed_mode = 1; model_name = params5.trt_ep_context_file_path; execution_provider = TensorrtExecutionProviderWithOptions(¶ms6); EXPECT_TRUE(session_object6.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); From dccb2da2fab833337da9e192a0738f968e1907bc Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sun, 21 Jan 2024 00:12:42 +0000 Subject: [PATCH 25/25] update unit test comment --- onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index ff95d6e2c235c..4d2538c947dcc 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -485,7 +485,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { * * expected result=> * engine cache path is "./context_model_folder/engine_cache_folder/xxxxx.engine" - * and the "ep_cache_context" attribute node of the context model should point to that. + * and the "ep_cache_context" attribute node of the context model should point to "engine_cache_folder/xxxxx.engine". * */ InferenceSession session_object4{so, GetEnvironment()};