From b25517fbf2cf30c1d6180fced79c9c79135fb5ee Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sun, 17 Sep 2023 19:12:00 +0000 Subject: [PATCH] Add member initialization for OrtTensorRTProviderOptionsV2 --- .../tensorrt/tensorrt_provider_options.h | 68 +++++++++---------- .../core/session/provider_bridge_ort.cc | 28 -------- 2 files changed, 34 insertions(+), 62 deletions(-) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h index e7d0f9f03ade9..8f2b5af870506 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h @@ -11,38 +11,38 @@ /// User can only get the instance of OrtTensorRTProviderOptionsV2 via CreateTensorRTProviderOptions. /// struct OrtTensorRTProviderOptionsV2 { - int device_id; // cuda device id. - int has_user_compute_stream; // indicator of user specified CUDA compute stream. - void* user_compute_stream; // user specified CUDA compute stream. - int trt_max_partition_iterations; // maximum iterations for TensorRT parser to get capability - int trt_min_subgraph_size; // minimum size of TensorRT subgraphs - size_t trt_max_workspace_size; // maximum workspace size for TensorRT. - int trt_fp16_enable; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true - int trt_int8_enable; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true - const char* trt_int8_calibration_table_name; // TensorRT INT8 calibration table name. - int trt_int8_use_native_calibration_table; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true - int trt_dla_enable; // enable DLA. Default 0 = false, nonzero = true - int trt_dla_core; // DLA core number. Default 0 - int trt_dump_subgraphs; // dump TRT subgraph. Default 0 = false, nonzero = true - int trt_engine_cache_enable; // enable engine caching. Default 0 = false, nonzero = true - const char* trt_engine_cache_path; // specify engine cache path - int trt_engine_decryption_enable; // enable engine decryption. Default 0 = false, nonzero = true - const char* trt_engine_decryption_lib_path; // specify engine decryption library path - int trt_force_sequential_engine_build; // force building TensorRT engine sequentially. Default 0 = false, nonzero = true - int trt_context_memory_sharing_enable; // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true - int trt_layer_norm_fp32_fallback; // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true - int trt_timing_cache_enable; // enable TensorRT timing cache. Default 0 = false, nonzero = true - int trt_force_timing_cache; // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true - int trt_detailed_build_log; // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true - int trt_build_heuristics_enable; // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true - int trt_sparsity_enable; // Control if sparsity can be used by TRT. Default 0 = false, 1 = true - int trt_builder_optimization_level; // Set the builder optimization level. WARNING: levels below 3 do not guarantee good engine performance, but greatly improve build time. Default 3, valid range [0-5] - int trt_auxiliary_streams; // Set maximum number of auxiliary streams per inference stream. Setting this value to 0 will lead to optimal memory usage. Default -1 = heuristics - const char* trt_tactic_sources; // pecify the tactics to be used by adding (+) or removing (-) tactics from the default - // tactic sources (default = all available tactics) e.g. "-CUDNN,+CUBLAS" available keys: "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS" - const char* trt_extra_plugin_lib_paths; // specify extra TensorRT plugin library paths - const char* trt_profile_min_shapes; // Specify the range of the input shapes to build the engine with - const char* trt_profile_max_shapes; // Specify the range of the input shapes to build the engine with - const char* trt_profile_opt_shapes; // Specify the range of the input shapes to build the engine with - int trt_cuda_graph_enable; // Enable CUDA graph in ORT TRT + int device_id{0}; // cuda device id. + int has_user_compute_stream{0}; // indicator of user specified CUDA compute stream. + void* user_compute_stream{nullptr}; // user specified CUDA compute stream. + int trt_max_partition_iterations{1000}; // maximum iterations for TensorRT parser to get capability + int trt_min_subgraph_size{1}; // minimum size of TensorRT subgraphs + size_t trt_max_workspace_size{1 << 30}; // maximum workspace size for TensorRT. + int trt_fp16_enable{0}; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true + int trt_int8_enable{0}; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true + const char* trt_int8_calibration_table_name{nullptr}; // TensorRT INT8 calibration table name. + int trt_int8_use_native_calibration_table{0}; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true + int trt_dla_enable{0}; // enable DLA. Default 0 = false, nonzero = true + int trt_dla_core{0}; // DLA core number. Default 0 + int trt_dump_subgraphs{0}; // dump TRT subgraph. Default 0 = false, nonzero = true + int trt_engine_cache_enable{0}; // enable engine caching. Default 0 = false, nonzero = true + const char* trt_engine_cache_path{nullptr}; // specify engine cache path + int trt_engine_decryption_enable{0}; // enable engine decryption. Default 0 = false, nonzero = true + const char* trt_engine_decryption_lib_path{nullptr}; // specify engine decryption library path + int trt_force_sequential_engine_build{0}; // force building TensorRT engine sequentially. Default 0 = false, nonzero = true + int trt_context_memory_sharing_enable{0}; // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true + int trt_layer_norm_fp32_fallback{0}; // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true + int trt_timing_cache_enable{0}; // enable TensorRT timing cache. Default 0 = false, nonzero = true + int trt_force_timing_cache{0}; // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true + int trt_detailed_build_log{0}; // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true + int trt_build_heuristics_enable{0}; // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true + int trt_sparsity_enable{0}; // Control if sparsity can be used by TRT. Default 0 = false, 1 = true + int trt_builder_optimization_level{3}; // Set the builder optimization level. WARNING: levels below 3 do not guarantee good engine performance, but greatly improve build time. Default 3, valid range [0-5] + int trt_auxiliary_streams{-1}; // Set maximum number of auxiliary streams per inference stream. Setting this value to 0 will lead to optimal memory usage. Default -1 = heuristics + const char* trt_tactic_sources{nullptr}; // pecify the tactics to be used by adding (+) or removing (-) tactics from the default + // tactic sources (default = all available tactics) e.g. "-CUDNN,+CUBLAS" available keys: "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS" + const char* trt_extra_plugin_lib_paths{nullptr}; // specify extra TensorRT plugin library paths + const char* trt_profile_min_shapes{nullptr}; // Specify the range of the input shapes to build the engine with + const char* trt_profile_max_shapes{nullptr}; // Specify the range of the input shapes to build the engine with + const char* trt_profile_opt_shapes{nullptr}; // Specify the range of the input shapes to build the engine with + int trt_cuda_graph_enable{0}; // Enable CUDA graph in ORT TRT }; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 8f0a5aeaa3975..5b35767a3d472 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1784,34 +1784,6 @@ ORT_API_STATUS_IMPL(OrtApis::CreateTensorRTProviderOptions, _Outptr_ OrtTensorRT API_IMPL_BEGIN #ifdef USE_TENSORRT auto options = std::make_unique(); - options->device_id = 0; - options->has_user_compute_stream = 0; - options->user_compute_stream = nullptr; - options->trt_max_partition_iterations = 1000; - options->trt_min_subgraph_size = 1; - options->trt_max_workspace_size = 1 << 30; - options->trt_fp16_enable = false; - options->trt_int8_enable = false; - options->trt_int8_calibration_table_name = nullptr; - options->trt_int8_use_native_calibration_table = false; - options->trt_dla_enable = false; - options->trt_dla_core = false; - options->trt_dump_subgraphs = false; - options->trt_engine_cache_enable = false; - options->trt_engine_cache_path = nullptr; - options->trt_engine_decryption_enable = false; - options->trt_engine_decryption_lib_path = nullptr; - options->trt_force_sequential_engine_build = false; - options->trt_context_memory_sharing_enable = false; - options->trt_layer_norm_fp32_fallback = false; - options->trt_timing_cache_enable = false; - options->trt_force_timing_cache = false; - options->trt_detailed_build_log = false; - options->trt_extra_plugin_lib_paths = nullptr; - options->trt_profile_min_shapes = nullptr; - options->trt_profile_max_shapes = nullptr; - options->trt_profile_opt_shapes = nullptr; - options->trt_cuda_graph_enable = false; *out = options.release(); return nullptr; #else