From 2952cf82a52ade99fee9ee9dcfd3570dd4e51863 Mon Sep 17 00:00:00 2001 From: RandySheriffH <48490400+RandySheriffH@users.noreply.github.com> Date: Fri, 15 Dec 2023 14:57:55 -0800 Subject: [PATCH 1/5] Access map by iterator to silence sanity check. (#18835) Use iterator to refer to the set. Co-authored-by: Randy Shuai --- onnxruntime/core/framework/allocation_planner.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/framework/allocation_planner.cc b/onnxruntime/core/framework/allocation_planner.cc index 9556e056dedc0..ea7a6432a7507 100644 --- a/onnxruntime/core/framework/allocation_planner.cc +++ b/onnxruntime/core/framework/allocation_planner.cc @@ -1035,8 +1035,11 @@ class PlannerImpl { std::function dfs = [&](NodeIndex curr) { if (dependents.find(curr) == dependents.end()) { dependents.insert(curr); - for (NodeIndex dep : dependence_graph_[curr]) { - dfs(dep); + auto dep_graph_iter = dependence_graph_.find(curr); + if (dep_graph_iter != dependence_graph_.end()) { + for (NodeIndex dep : dep_graph_iter->second) { + dfs(dep); + } } } }; From 50cbcf95877b60795f32c4538611f9a119bb0291 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 15 Dec 2023 15:56:20 -0800 Subject: [PATCH 2/5] Build function bodies according to the imported global opset. (#18833) ### Description Build function bodies according to the imported global opset. Same is for querying ONNX functions. ### Motivation and Context This addresses issues: https://github.com/microsoft/onnxruntime/issues/18781 https://github.com/microsoft/onnxruntime/issues/16438 --- onnxruntime/core/graph/graph.cc | 35 ++++++++----- onnxruntime/test/framework/function_test.cc | 54 +++++++++++++++++++++ 2 files changed, 77 insertions(+), 12 deletions(-) diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc index d489a59c4b798..baebe2420073b 100644 --- a/onnxruntime/core/graph/graph.cc +++ b/onnxruntime/core/graph/graph.cc @@ -582,6 +582,17 @@ bool Node::TryGetFunctionProto(ONNX_NAMESPACE::FunctionProto& onnx_function_prot onnx_function_proto = *func_template_->onnx_func_proto_; return true; } else if (op_) { + auto get_opset_version = [op = op_](Graph* graph) -> std::optional { + if (op->domain() == kOnnxDomain) { + const auto& domain_to_version = graph->DomainToVersionMap(); + const auto iter = domain_to_version.find(kOnnxDomain); + if (iter != domain_to_version.cend()) { + return iter->second; + } + } + return {}; + }; + // Check if this node has a schema defined function proto. if (op_->HasContextDependentFunction()) { NodeProto node_proto; @@ -595,8 +606,13 @@ bool Node::TryGetFunctionProto(ONNX_NAMESPACE::FunctionProto& onnx_function_prot } else input_types.emplace_back(); } + + auto requested_opset_version = get_opset_version(graph_); + if (!requested_opset_version.has_value()) { + requested_opset_version = SinceVersion(); + } ONNX_NAMESPACE::FunctionBodyBuildContextImpl function_body_ctx(node_proto, input_types); - return op_->BuildContextDependentFunction(function_body_ctx, onnx_function_proto); + return op_->BuildContextDependentFunction(function_body_ctx, onnx_function_proto, *requested_opset_version); } else if (op_->HasFunction()) { const FunctionProto* function_ptr = nullptr; // We need to get a function-body suitable for the ONNX opset used by the model. @@ -605,17 +621,12 @@ bool Node::TryGetFunctionProto(ONNX_NAMESPACE::FunctionProto& onnx_function_prot // as the default-version, which is incorrect in the case of functions belonging to // non-onnx domains, like MSDOMAIN. - // We use the following as a temporary hack. - function_ptr = op_->GetFunction(SinceVersion(), false); - - // TODO: Switch to following, once ONNX issue is fixed. - // auto& map = graph_->DomainToVersionMap(); - // const auto iter = map.find(kOnnxDomain); - // if (iter != map.end()) { - // function_ptr = op_->GetFunction(iter->second, true); - // } else { - // function_ptr = op_->GetFunction(); - // } + auto requested_opset_version = get_opset_version(graph_); + if (requested_opset_version.has_value()) { + function_ptr = op_->GetFunction(*requested_opset_version, true); + } else { + function_ptr = op_->GetFunction(SinceVersion(), false); + } if (function_ptr != nullptr) { onnx_function_proto = *function_ptr; diff --git a/onnxruntime/test/framework/function_test.cc b/onnxruntime/test/framework/function_test.cc index 9ab78cac3aca4..fa3545ef27d72 100644 --- a/onnxruntime/test/framework/function_test.cc +++ b/onnxruntime/test/framework/function_test.cc @@ -614,5 +614,59 @@ TEST(FunctionTest, TestInlinedFunctionDoesNotReserrectNonExistingArgs) { AsSpan(output_names), &fetches, 0)); } +/// +/// This test covers the issues: +/// https://github.com/microsoft/onnxruntime/issues/16438 +/// https://github.com/microsoft/onnxruntime/issues/18781 +/// +TEST(FunctionTest, Test_GH_issue_16438) { + const char* code = R"( + < + ir_version: 8, + opset_import: ["pkg.onnxscript.torch_lib" : 1, "" : 18], + producer_name: "pytorch", + producer_version: "2.1.0" + > + torch_jit (float16[5,10,5] input_0) => (double[5,10,5] _val_1) { + _val_1 = pkg.onnxscript.torch_lib.aten_special_log_softmax (input_0) + } + < + domain: "pkg.onnxscript.torch_lib", + opset_import: ["" : 18] + > + aten_special_log_softmax (self) => (result_8) + { + tmp = Shape(self) + tmp_0 = Size(tmp) + int64_0 = Constant () + int64_0_cast = CastLike(int64_0, tmp_0) + self_is_scalar = Equal(tmp_0, int64_0_cast) + self_4 = If(self_is_scalar) (self_2) { + tmp_1 = Constant () + self_2 = Unsqueeze(self, tmp_1) + }, else_branch : graph = elseGraph_8() => (self_3) { + self_3 = Identity(self) + }> + result = LogSoftmax(self_4) + result_5 = Cast(result) + result_8 = If(self_is_scalar) (result_6) { + result_6 = Squeeze(result_5) + }, else_branch : graph = elseGraph_12() => (result_7) { + result_7 = Identity(result_5) + }> + } + )"; + + std::string serialized_model; + ParseOnnxSource(code, serialized_model); + SessionOptions session_options; + InferenceSession session_object{session_options, GetEnvironment()}; + + std::stringstream sstr(serialized_model); + auto status = session_object.Load(sstr); + ASSERT_TRUE(status.IsOK()) << status.ErrorMessage(); + status = session_object.Initialize(); + ASSERT_TRUE(status.IsOK()) << status.ErrorMessage(); +} } // namespace test } // namespace onnxruntime From ad476d5a1fb63a4cad74899873ccbf61e9487a23 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Fri, 15 Dec 2023 17:44:02 -0800 Subject: [PATCH 3/5] Change Nuget packaging pipeline's build TRT job to download CUDA SDK on-the-fly (#18847) ### Description Change Nuget packaging pipeline's build TRT job to download CUDA SDK on-the-fly, so that we do not need to put a CUDA SDK in the build machine's image. --- .../azure-pipelines/c-api-noopenmp-packaging-pipelines.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index fcf15778c7902..50ca6908520a9 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -242,6 +242,7 @@ stages: runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: true java_artifact_id: onnxruntime_gpu + CudaVersion: 11.8 # CUDA with Tensorrt - template: templates/win-ci.yml @@ -253,10 +254,11 @@ stages: buildArch: x64 msbuildPlatform: x64 packageName: x64-tensorrt - buildparameter: --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8" --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8" --enable_onnx_tests --enable_wcos --build_java --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" + buildparameter: --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8" --cuda_home="$(Agent.TempDirectory)\v11.8" --enable_onnx_tests --enable_wcos --build_java --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: true java_artifact_id: onnxruntime_gpu + CudaVersion: 11.8 UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }} # ROCm From 9426bd50cb52cd0715e5f917cc70bff3190ef4c1 Mon Sep 17 00:00:00 2001 From: Yifan Li <109183385+yf711@users.noreply.github.com> Date: Mon, 18 Dec 2023 09:16:09 -0800 Subject: [PATCH 4/5] [TensorRT EP] Update deprecated TRT api (#18834) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Description Update deprecated TRT api: 1. [setMaxWorkspaceSize](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_builder_config.html#a8209999988ab480c60c8a905dfd2654d)(max_workspace_size_)-------->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_size_) 2. [kENABLE_TACTIC_HEURISTIC](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/namespacenvinfer1.html#abdc74c40fe7a0c3d05d2caeccfbc29c1a1215692ad24465e4d9e37a8a7fce1a38)-------->supersede by trt builder optimization level 2 Perf & warning log comparison
TRT EP options | User will see corresponding warning logs: | Average inference time cost (FRCNN on A100) -- | -- | -- trt_build_heuristics_enable\|true | [TensorRT EP] trt_build_heuristics_enable is deprecated on TRT 8.6 onwards. Please set builder optimization level as 2 to enable builder heuristics. | ~300ms trt_build_heuristics_enable\|true   trt_builder_optimization_level\|2 | [TensorRT EP] Builder heuristics are enabled automatically by builder optimization level 2. trt_build_heuristics_enable is deprecated on TRT 8.6 onwards. | ~275ms trt_builder_optimization_level\|2 |   | ~275ms
### Motivation and Context Prepare for upcoming TRT 10 --- .../tensorrt/tensorrt_execution_provider.cc | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index c4212bfc286f7..f31bea3adfe56 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -2506,7 +2506,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector(trt_builder->createBuilderConfig()); auto trt_parser = tensorrt_ptr::unique_pointer(nvonnxparser::createParser(*trt_network, trt_logger)); trt_parser->parse(string_buf.data(), string_buf.size(), model_path_); - trt_config->setMaxWorkspaceSize(max_workspace_size_); + trt_config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_size_); // Force Pow + Reduce ops in layer norm to run in FP32 to avoid overflow if (fp16_enable_ && layer_norm_fp32_fallback_) { @@ -2723,13 +2723,24 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectorsetFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS); LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Sparse weights are allowed"; } - - // enable builder heuristics +#if NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR == 5 if (build_heuristics_enable_) { trt_config->setFlag(nvinfer1::BuilderFlag::kENABLE_TACTIC_HEURISTIC); - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Builder heuristics are enabled"; + LOGS_DEFAULT(WARNING) << "[TensorRT EP] Builder heuristics are enabled." + << " For TRT > 8.5, trt_build_heuristics_enable is deprecated, please set builder optimization level as 2 to enable builder heuristics."; } -#if NV_TENSORRT_MINOR > 5 && NV_TENSORRT_MAJOR >= 8 +#elif NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR > 5 || NV_TENSORRT_MAJOR > 8 + // for TRT 8.6 onwards, heuristic-based tactic option is automatically enabled by setting builder optimization level 2 + if (build_heuristics_enable_) { + if (builder_optimization_level_ == 2) { + LOGS_DEFAULT(WARNING) << "[TensorRT EP] Builder heuristics are automatically enabled by builder optimization level 2. trt_build_heuristics_enable is deprecated on TRT 8.6 onwards."; + } else { + LOGS_DEFAULT(WARNING) << "[TensorRT EP] trt_build_heuristics_enable is deprecated on TRT 8.6 onwards. Please set builder optimization level as 2 to enable builder heuristics."; + } + } +#endif + +#if NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR > 5 || NV_TENSORRT_MAJOR > 8 // switch optimizaion level if (builder_optimization_level_ != 3) { trt_config->setBuilderOptimizationLevel(builder_optimization_level_); @@ -3125,7 +3136,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectorcontext->reset(); trt_state->engine->reset(); auto trt_config = std::unique_ptr(trt_builder->createBuilderConfig()); - trt_config->setMaxWorkspaceSize(*(trt_state->max_workspace_size_ptr)); + trt_config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, *(trt_state->max_workspace_size_ptr)); for (auto trt_profile : trt_profiles) { trt_config->addOptimizationProfile(trt_profile); } @@ -3166,7 +3177,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectorsetFlag(nvinfer1::BuilderFlag::kENABLE_TACTIC_HEURISTIC); LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Builder heuristics are enabled"; } -#if NV_TENSORRT_MINOR > 5 && NV_TENSORRT_MAJOR >= 8 +#if NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR > 5 || NV_TENSORRT_MAJOR > 8 // switch optimizaion level if (trt_state->builder_optimization_level != 3) { trt_config->setBuilderOptimizationLevel(trt_state->builder_optimization_level); From ea6186efa8e0fd9b1b62a8c392508af088e9df8e Mon Sep 17 00:00:00 2001 From: sophies927 <107952697+sophies927@users.noreply.github.com> Date: Mon, 18 Dec 2023 09:57:33 -0800 Subject: [PATCH 5/5] Update stale.yml to correct close-issue-message (#18849) ### Description ### Motivation and Context --- .github/workflows/stale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 95607f297c6bd..3ef5076583001 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -29,7 +29,7 @@ jobs: # Label you want to apply to issues that have been inactive for the amount of time specified by days-before-issue-stale stale-issue-label: "stale" # Comment that you want to add to issues that are labeled by the actions/stale action - stale-issue-message: "This issue has been automatically marked as stale due to inactivity and will be closed in 7 days if no further activity occurs. If further support is needed, please provide an update and/or more details." + stale-issue-message: "This issue has been automatically marked as stale due to inactivity and will be closed in 30 days if no further activity occurs. If further support is needed, please provide an update and/or more details." # Comment that you want to add to issues that are closed by the actions/stale action close-issue-message: "This issue has been automatically closed due to inactivity. Please reactivate if further support is needed." # If you never want this action to label PRs, set this value to -1