From 56e4fda8a84a7a128a052e7ec6d2a6f09cb2a3e6 Mon Sep 17 00:00:00 2001 From: Chi Lo <54722500+chilo-ms@users.noreply.github.com> Date: Tue, 19 Nov 2024 09:08:54 -0800 Subject: [PATCH] [TensorRT EP] Revert "Add new provider option to exclude nodes from running on TRT" (#22878) - Revert https://github.com/microsoft/onnxruntime/pull/22681 - But still implicitly exclude DDS ops for TRT 10. Will later provide better PR to add trt_op_types_to_exclude provider option. --- .../tensorrt/tensorrt_provider_options.h | 36 ++++++----- .../tensorrt/tensorrt_execution_provider.cc | 43 ++++--------- .../tensorrt/tensorrt_execution_provider.h | 1 - .../tensorrt_execution_provider_info.cc | 6 -- .../tensorrt_execution_provider_info.h | 3 - .../tensorrt/tensorrt_provider_factory.cc | 1 - .../core/session/provider_bridge_ort.cc | 8 +-- .../python/onnxruntime_pybind_state.cc | 5 +- .../providers/tensorrt/tensorrt_basic_test.cc | 60 ------------------- 9 files changed, 33 insertions(+), 130 deletions(-) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h index 5e5319a34ee9f..ec9be80a63574 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h @@ -71,23 +71,21 @@ struct OrtTensorRTProviderOptionsV2 { * directory by means of the "trt_onnx_model_folder_path" option. * */ - int trt_dump_ep_context_model{0}; // Dump EP context node model - const char* trt_ep_context_file_path{nullptr}; // Specify file name to dump EP context node model. Can be a path or a file name or a file name with path. - int trt_ep_context_embed_mode{0}; // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data - int trt_weight_stripped_engine_enable{0}; // Enable weight-stripped engine build. Default 0 = false, - // nonzero = true - const char* trt_onnx_model_folder_path{nullptr}; // Folder path relative to the current working directory for - // the ONNX model containing the weights (applicable only when - // the "trt_weight_stripped_engine_enable" option is enabled) - const void* trt_onnx_bytestream{nullptr}; // The byte stream of th original ONNX model containing the weights - // (applicable only when the "trt_weight_stripped_engine_enable" - // option is enabled) - // can be updated using: UpdateTensorRTProviderOptionsWithValue - size_t trt_onnx_bytestream_size{0}; // size of the byte stream provided as "trt_onnx_bytestream" - // can be updated using: UpdateTensorRTProviderOptionsWithValue - const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix - int trt_engine_hw_compatible{0}; // Enable hardware compatibility. Default 0 = false, nonzero = true - const char* trt_op_types_to_exclude{"NonMaxSuppression,NonZero,RoiAlign"}; // Exclude specific ops from running on TRT. - // There is a known performance issue with the DDS ops (NonMaxSuppression, NonZero and RoiAlign) from TRT versions 10.0 to 10.7. - // TRT EP excludes DDS ops from running on TRT by default, user can override default value with empty string to include all ops. + int trt_dump_ep_context_model{0}; // Dump EP context node model + const char* trt_ep_context_file_path{nullptr}; // Specify file name to dump EP context node model. Can be a path or a file name or a file name with path. + int trt_ep_context_embed_mode{0}; // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data + int trt_weight_stripped_engine_enable{0}; // Enable weight-stripped engine build. Default 0 = false, + // nonzero = true + const char* trt_onnx_model_folder_path{nullptr}; // Folder path relative to the current working directory for + // the ONNX model containing the weights (applicable only when + // the "trt_weight_stripped_engine_enable" option is enabled) + const void* trt_onnx_bytestream{nullptr}; // The byte stream of th original ONNX model containing the weights + // (applicable only when the "trt_weight_stripped_engine_enable" + // option is enabled) + // can be updated using: UpdateTensorRTProviderOptionsWithValue + size_t trt_onnx_bytestream_size{0}; // size of the byte stream provided as "trt_onnx_bytestream" + // can be updated using: UpdateTensorRTProviderOptionsWithValue + + const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix + int trt_engine_hw_compatible{0}; // Enable hardware compatibility. Default 0 = false, nonzero = true }; diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index 1a5cf6ababdfc..a7330c9bcf13d 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1379,8 +1379,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv profile_opt_shapes = info.profile_opt_shapes; cuda_graph_enable_ = info.cuda_graph_enable; engine_hw_compatible_ = info.engine_hw_compatible; - op_types_to_exclude_ = info.op_types_to_exclude; - } else { try { const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations); @@ -1567,11 +1565,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv cuda_graph_enable_ = (std::stoi(cuda_graph_enable_env) == 0 ? false : true); } - const std::string op_types_to_exclude_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kOpTypesToExclude); - if (!op_types_to_exclude_env.empty()) { - op_types_to_exclude_ = op_types_to_exclude_env; - } - } catch (const std::invalid_argument& ex) { LOGS_DEFAULT(WARNING) << "[TensorRT EP] Invalid Argument (from environment variables): " << ex.what(); } catch (const std::out_of_range& ex) { @@ -1773,8 +1766,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv << ", trt_ep_context_embed_mode: " << ep_context_embed_mode_ << ", trt_cache_prefix: " << cache_prefix_ << ", trt_engine_hw_compatible: " << engine_hw_compatible_ - << ", trt_onnx_model_bytestream_size_: " << onnx_model_bytestream_size_ - << ", trt_op_types_to_exclude: " << op_types_to_exclude_; + << ", trt_onnx_model_bytestream_size_: " << onnx_model_bytestream_size_; } TensorrtExecutionProvider::~TensorrtExecutionProvider() { @@ -2442,18 +2434,6 @@ bool TensorrtExecutionProvider::DetectTensorRTGraphCycles(SubGraphCollection_t& return cycle_detected; } -std::set GetExcludedNodeSet(std::string node_list_to_exclude) { - std::set set; - if (!node_list_to_exclude.empty()) { - std::stringstream node_list(node_list_to_exclude); - std::string node; - while (std::getline(node_list, node, ',')) { - set.insert(node); - } - } - return set; -} - std::vector> TensorrtExecutionProvider::GetCapability(const GraphViewer& graph, const IKernelLookup& /*kernel_lookup*/) const { @@ -2486,14 +2466,17 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph, std::vector nodes_vector(number_of_ort_nodes); std::iota(std::begin(nodes_vector), std::end(nodes_vector), 0); - std::set exclude_set = GetExcludedNodeSet(op_types_to_exclude_); + std::set exclude_ops_set; - // Print excluded nodes, if any. - std::set::iterator it; - for (it = exclude_set.begin(); it != exclude_set.end(); ++it) { - std::string op = *it; - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Exclude \"" << op << "\" from running on TRT, if any."; - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Remove \"" << op << "\" from trt_op_types_to_exclude or specify trt_op_types_to_exclude with empty string to include the op in the input to TRT parser. However, it still depends on TRT parser to determine the eligibility of this op for TRT."; + /* + * There is a known performance issue with the DDS ops (NonMaxSuppression, NonZero and RoiAlign) in TRT 10. + * TRT EP automatically excludes DDS ops from running on TRT. + */ + if (trt_version_ >= 100000 && trt_version_ < 110000) { + exclude_ops_set.insert("NonMaxSuppression"); + exclude_ops_set.insert("NonZero"); + exclude_ops_set.insert("RoiAlign"); + LOGS_DEFAULT(VERBOSE) << "There is a known performance issue with the DDS ops (NonMaxSuppression, NonZero and RoiAlign) in TRT 10. TRT EP automatically excludes DDS ops from running on TRT, if applicable"; } SubGraphCollection_t parser_nodes_vector, supported_nodes_vector; @@ -2502,7 +2485,7 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph, /* Iterate all the nodes and exclude the node if: * 1. It's a control flow op and its subgraph(s) is not fully TRT eligible. - * 2. It's in the exlucded set which specified by trt_op_types_to_exclude. + * 2. It's a DDS op. */ for (const auto& index : nodes_vector) { const auto& node = graph.GetNode(node_index[index]); @@ -2538,7 +2521,7 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph, } // Exclude any ops, if applicable - if (exclude_set.find(node->OpType()) != exclude_set.end()) { + if (exclude_ops_set.find(node->OpType()) != exclude_ops_set.end()) { supported_node = false; } diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index 9d8af02ba10e6..9e3a03417d917 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -57,7 +57,6 @@ static const std::string kDumpEpContextModel = "ORT_DUMP_EP_CONTEXT_MODEL"; static const std::string kEpContextEmbedMode = "ORT_EP_CONTEXT_EMBED_MODE"; static const std::string kEpContextComputeCapabilityEnable = "ORT_EP_CONTEXT_COMPUTE_CAPABILITY_ENABLE"; static const std::string kEngineCachePrefix = "ORT_TENSORRT_CACHE_PREFIX"; -static const std::string kOpTypesToExclude = "ORT_TENSORRT_OP_TYPES_TO_EXCLUDE"; // Old env variable for backward compatibility static const std::string kEngineCachePath = "ORT_TENSORRT_ENGINE_CACHE_PATH"; } // namespace tensorrt_env_vars diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc index bc0d00ec6791f..63b6d35072290 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc @@ -56,7 +56,6 @@ constexpr const char* kDumpEpContextModel = "trt_dump_ep_context_model"; constexpr const char* kEngineHwCompatible = "trt_engine_hw_compatible"; constexpr const char* kONNXBytestream = "trt_onnx_bytestream"; constexpr const char* kONNXBytestreamSize = "trt_onnx_bytestream_size"; -constexpr const char* kOpTypesToExclude = "trt_op_types_to_exclude"; } // namespace provider_option_names } // namespace tensorrt @@ -135,7 +134,6 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions return Status::OK(); }) .AddAssignmentToReference(tensorrt::provider_option_names::kONNXBytestreamSize, info.onnx_bytestream_size) - .AddAssignmentToReference(tensorrt::provider_option_names::kOpTypesToExclude, info.op_types_to_exclude) .Parse(options)); // add new provider option here. info.user_compute_stream = user_compute_stream; @@ -190,7 +188,6 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE {tensorrt::provider_option_names::kEngineHwCompatible, MakeStringWithClassicLocale(info.engine_hw_compatible)}, {tensorrt::provider_option_names::kONNXBytestream, MakeStringWithClassicLocale(info.onnx_bytestream)}, {tensorrt::provider_option_names::kONNXBytestreamSize, MakeStringWithClassicLocale(info.onnx_bytestream_size)}, - {tensorrt::provider_option_names::kOpTypesToExclude, MakeStringWithClassicLocale(info.op_types_to_exclude)}, }; return options; } @@ -209,7 +206,6 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor const std::string kProfilesOptShapes_ = empty_if_null(info.trt_profile_opt_shapes); const std::string kEpContextFilePath_ = empty_if_null(info.trt_ep_context_file_path); const std::string kOnnxModelFolderPath_ = empty_if_null(info.trt_onnx_model_folder_path); - const std::string kOpTypesToExclude_ = empty_if_null(info.trt_op_types_to_exclude); const ProviderOptions options{ {tensorrt::provider_option_names::kDeviceId, MakeStringWithClassicLocale(info.device_id)}, @@ -255,7 +251,6 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor {tensorrt::provider_option_names::kEngineHwCompatible, MakeStringWithClassicLocale(info.trt_engine_hw_compatible)}, {tensorrt::provider_option_names::kONNXBytestream, MakeStringWithClassicLocale(reinterpret_cast(info.trt_onnx_bytestream))}, {tensorrt::provider_option_names::kONNXBytestreamSize, MakeStringWithClassicLocale(info.trt_onnx_bytestream_size)}, - {tensorrt::provider_option_names::kOpTypesToExclude, kOpTypesToExclude_}, }; return options; } @@ -360,6 +355,5 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options trt_provider_options_v2.trt_engine_hw_compatible = internal_options.engine_hw_compatible; trt_provider_options_v2.trt_onnx_bytestream = internal_options.onnx_bytestream; trt_provider_options_v2.trt_onnx_bytestream_size = internal_options.onnx_bytestream_size; - trt_provider_options_v2.trt_op_types_to_exclude = copy_string_if_needed(internal_options.op_types_to_exclude); } } // namespace onnxruntime diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h index 767f320d760a8..fa1bbd6d3d7e6 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h @@ -60,9 +60,6 @@ struct TensorrtExecutionProviderInfo { int ep_context_embed_mode{0}; std::string engine_cache_prefix{""}; bool engine_hw_compatible{false}; - // There is a known performance issue with the DDS ops (NonMaxSuppression, NonZero and RoiAlign) from TRT versions 10.0 to 10.7. - // TRT EP excludes DDS ops from running on TRT by default, user can override default value of trt_op_types_to_exclude with empty string to include all ops. - std::string op_types_to_exclude{"NonMaxSuppression,NonZero,RoiAlign"}; static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options); static ProviderOptions ToProviderOptions(const TensorrtExecutionProviderInfo& info); diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc index e4521ddd18ade..e242788ff389a 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc @@ -118,7 +118,6 @@ struct Tensorrt_Provider : Provider { info.engine_hw_compatible = options.trt_engine_hw_compatible != 0; info.onnx_bytestream = options.trt_onnx_bytestream; info.onnx_bytestream_size = options.trt_onnx_bytestream_size; - info.op_types_to_exclude = options.trt_op_types_to_exclude == nullptr ? "" : options.trt_op_types_to_exclude; return std::make_shared(info); } diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 0aa93bce354e8..ecbdd31160c7a 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -2294,11 +2294,8 @@ ORT_API_STATUS_IMPL(OrtApis::UpdateTensorRTProviderOptions, #ifdef USE_TENSORRT onnxruntime::ProviderOptions provider_options_map; for (size_t i = 0; i != num_keys; ++i) { - // Don't allow key and value to be empty except the value of trt_op_types_to_exclude - if (provider_options_keys[i] == nullptr || - provider_options_keys[i][0] == '\0' || - (provider_options_values[i] == nullptr && strcmp("trt_op_types_to_exclude", provider_options_keys[i])) || - (provider_options_values[i][0] == '\0' && strcmp("trt_op_types_to_exclude", provider_options_keys[i]))) { + if (provider_options_keys[i] == nullptr || provider_options_keys[i][0] == '\0' || + provider_options_values[i] == nullptr || provider_options_values[i][0] == '\0') { return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "key/value cannot be empty"); } @@ -2413,7 +2410,6 @@ ORT_API(void, OrtApis::ReleaseTensorRTProviderOptions, _Frees_ptr_opt_ OrtTensor delete[] ptr->trt_profile_opt_shapes; delete[] ptr->trt_ep_context_file_path; delete[] ptr->trt_onnx_model_folder_path; - if (!ptr->trt_op_types_to_exclude) delete[] ptr->trt_op_types_to_exclude; } std::unique_ptr p(ptr); diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 05ca3c6c15793..4d9583be0ef0f 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -526,7 +526,7 @@ std::unique_ptr CreateExecutionProviderInstance( // and TRT EP instance, so it won't be released.) std::string calibration_table, cache_path, cache_prefix, timing_cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile, ep_context_file_path, - onnx_model_folder_path, trt_op_types_to_exclude{"NonMaxSuppression,NonZero,RoiAlign"}; + onnx_model_folder_path; auto it = provider_options_map.find(type); if (it != provider_options_map.end()) { OrtTensorRTProviderOptionsV2 params; @@ -824,9 +824,6 @@ std::unique_ptr CreateExecutionProviderInstance( } else { ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_hw_compatible' should be 'True' or 'False'. Default value is 'False'.\n"); } - } else if (option.first == "trt_op_types_to_exclude") { - trt_op_types_to_exclude = option.second; - params.trt_op_types_to_exclude = trt_op_types_to_exclude.c_str(); } else { ORT_THROW("Invalid TensorRT EP option: ", option.first); } diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index b4199548ae515..63327a028c6f4 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -612,66 +612,6 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) { RunSession(session_object9, run_options, feeds, output_names, expected_dims_mul_m, expected_values_mul_m); } -TEST(TensorrtExecutionProviderTest, ExcludeOpsTest) { - /* The mnist.onnx looks like this: - * Conv - * | - * Add - * . - * . - * | - * MaxPool - * | - * . - * . - * MaxPool - * | - * Reshape - * | - * MatMul - * . - * . - * - */ - PathString model_name = ORT_TSTR("testdata/mnist.onnx"); - SessionOptions so; - so.session_logid = "TensorrtExecutionProviderExcludeOpsTest"; - RunOptions run_options; - run_options.run_tag = so.session_logid; - InferenceSession session_object{so, GetEnvironment()}; - auto cuda_provider = DefaultCudaExecutionProvider(); - auto cpu_allocator = cuda_provider->CreatePreferredAllocators()[1]; - std::vector dims_op_x = {1, 1, 28, 28}; - std::vector values_op_x(784, 1.0f); // 784=1*1*28*28 - OrtValue ml_value_x; - CreateMLValue(cpu_allocator, dims_op_x, values_op_x, &ml_value_x); - NameMLValMap feeds; - feeds.insert(std::make_pair("Input3", ml_value_x)); - - // prepare outputs - std::vector output_names; - output_names.push_back("Plus214_Output_0"); - std::vector fetches; - - RemoveCachesByType("./", ".engine"); - OrtTensorRTProviderOptionsV2 params; - params.trt_engine_cache_enable = 1; - params.trt_op_types_to_exclude = "MaxPool"; - std::unique_ptr execution_provider = TensorrtExecutionProviderWithOptions(¶ms); - EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); - auto status = session_object.Load(model_name); - ASSERT_TRUE(status.IsOK()); - status = session_object.Initialize(); - ASSERT_TRUE(status.IsOK()); - status = session_object.Run(run_options, feeds, output_names, &fetches); - ASSERT_TRUE(status.IsOK()); - - std::vector engine_files; - engine_files = GetCachesByType("./", ".engine"); - // The whole graph should be partitioned into 3 TRT subgraphs and 2 cpu nodes - ASSERT_EQ(engine_files.size(), 3); -} - TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) { PathString model_name = ORT_TSTR("testdata/trt_plugin_custom_op_test.onnx"); SessionOptions so;