diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 95607f297c6bd..3ef5076583001 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -29,7 +29,7 @@ jobs: # Label you want to apply to issues that have been inactive for the amount of time specified by days-before-issue-stale stale-issue-label: "stale" # Comment that you want to add to issues that are labeled by the actions/stale action - stale-issue-message: "This issue has been automatically marked as stale due to inactivity and will be closed in 7 days if no further activity occurs. If further support is needed, please provide an update and/or more details." + stale-issue-message: "This issue has been automatically marked as stale due to inactivity and will be closed in 30 days if no further activity occurs. If further support is needed, please provide an update and/or more details." # Comment that you want to add to issues that are closed by the actions/stale action close-issue-message: "This issue has been automatically closed due to inactivity. Please reactivate if further support is needed." # If you never want this action to label PRs, set this value to -1 diff --git a/onnxruntime/core/framework/allocation_planner.cc b/onnxruntime/core/framework/allocation_planner.cc index 9556e056dedc0..ea7a6432a7507 100644 --- a/onnxruntime/core/framework/allocation_planner.cc +++ b/onnxruntime/core/framework/allocation_planner.cc @@ -1035,8 +1035,11 @@ class PlannerImpl { std::function dfs = [&](NodeIndex curr) { if (dependents.find(curr) == dependents.end()) { dependents.insert(curr); - for (NodeIndex dep : dependence_graph_[curr]) { - dfs(dep); + auto dep_graph_iter = dependence_graph_.find(curr); + if (dep_graph_iter != dependence_graph_.end()) { + for (NodeIndex dep : dep_graph_iter->second) { + dfs(dep); + } } } }; diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc index d489a59c4b798..baebe2420073b 100644 --- a/onnxruntime/core/graph/graph.cc +++ b/onnxruntime/core/graph/graph.cc @@ -582,6 +582,17 @@ bool Node::TryGetFunctionProto(ONNX_NAMESPACE::FunctionProto& onnx_function_prot onnx_function_proto = *func_template_->onnx_func_proto_; return true; } else if (op_) { + auto get_opset_version = [op = op_](Graph* graph) -> std::optional { + if (op->domain() == kOnnxDomain) { + const auto& domain_to_version = graph->DomainToVersionMap(); + const auto iter = domain_to_version.find(kOnnxDomain); + if (iter != domain_to_version.cend()) { + return iter->second; + } + } + return {}; + }; + // Check if this node has a schema defined function proto. if (op_->HasContextDependentFunction()) { NodeProto node_proto; @@ -595,8 +606,13 @@ bool Node::TryGetFunctionProto(ONNX_NAMESPACE::FunctionProto& onnx_function_prot } else input_types.emplace_back(); } + + auto requested_opset_version = get_opset_version(graph_); + if (!requested_opset_version.has_value()) { + requested_opset_version = SinceVersion(); + } ONNX_NAMESPACE::FunctionBodyBuildContextImpl function_body_ctx(node_proto, input_types); - return op_->BuildContextDependentFunction(function_body_ctx, onnx_function_proto); + return op_->BuildContextDependentFunction(function_body_ctx, onnx_function_proto, *requested_opset_version); } else if (op_->HasFunction()) { const FunctionProto* function_ptr = nullptr; // We need to get a function-body suitable for the ONNX opset used by the model. @@ -605,17 +621,12 @@ bool Node::TryGetFunctionProto(ONNX_NAMESPACE::FunctionProto& onnx_function_prot // as the default-version, which is incorrect in the case of functions belonging to // non-onnx domains, like MSDOMAIN. - // We use the following as a temporary hack. - function_ptr = op_->GetFunction(SinceVersion(), false); - - // TODO: Switch to following, once ONNX issue is fixed. - // auto& map = graph_->DomainToVersionMap(); - // const auto iter = map.find(kOnnxDomain); - // if (iter != map.end()) { - // function_ptr = op_->GetFunction(iter->second, true); - // } else { - // function_ptr = op_->GetFunction(); - // } + auto requested_opset_version = get_opset_version(graph_); + if (requested_opset_version.has_value()) { + function_ptr = op_->GetFunction(*requested_opset_version, true); + } else { + function_ptr = op_->GetFunction(SinceVersion(), false); + } if (function_ptr != nullptr) { onnx_function_proto = *function_ptr; diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index c4212bfc286f7..f31bea3adfe56 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -2506,7 +2506,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector(trt_builder->createBuilderConfig()); auto trt_parser = tensorrt_ptr::unique_pointer(nvonnxparser::createParser(*trt_network, trt_logger)); trt_parser->parse(string_buf.data(), string_buf.size(), model_path_); - trt_config->setMaxWorkspaceSize(max_workspace_size_); + trt_config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_size_); // Force Pow + Reduce ops in layer norm to run in FP32 to avoid overflow if (fp16_enable_ && layer_norm_fp32_fallback_) { @@ -2723,13 +2723,24 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectorsetFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS); LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Sparse weights are allowed"; } - - // enable builder heuristics +#if NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR == 5 if (build_heuristics_enable_) { trt_config->setFlag(nvinfer1::BuilderFlag::kENABLE_TACTIC_HEURISTIC); - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Builder heuristics are enabled"; + LOGS_DEFAULT(WARNING) << "[TensorRT EP] Builder heuristics are enabled." + << " For TRT > 8.5, trt_build_heuristics_enable is deprecated, please set builder optimization level as 2 to enable builder heuristics."; } -#if NV_TENSORRT_MINOR > 5 && NV_TENSORRT_MAJOR >= 8 +#elif NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR > 5 || NV_TENSORRT_MAJOR > 8 + // for TRT 8.6 onwards, heuristic-based tactic option is automatically enabled by setting builder optimization level 2 + if (build_heuristics_enable_) { + if (builder_optimization_level_ == 2) { + LOGS_DEFAULT(WARNING) << "[TensorRT EP] Builder heuristics are automatically enabled by builder optimization level 2. trt_build_heuristics_enable is deprecated on TRT 8.6 onwards."; + } else { + LOGS_DEFAULT(WARNING) << "[TensorRT EP] trt_build_heuristics_enable is deprecated on TRT 8.6 onwards. Please set builder optimization level as 2 to enable builder heuristics."; + } + } +#endif + +#if NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR > 5 || NV_TENSORRT_MAJOR > 8 // switch optimizaion level if (builder_optimization_level_ != 3) { trt_config->setBuilderOptimizationLevel(builder_optimization_level_); @@ -3125,7 +3136,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectorcontext->reset(); trt_state->engine->reset(); auto trt_config = std::unique_ptr(trt_builder->createBuilderConfig()); - trt_config->setMaxWorkspaceSize(*(trt_state->max_workspace_size_ptr)); + trt_config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, *(trt_state->max_workspace_size_ptr)); for (auto trt_profile : trt_profiles) { trt_config->addOptimizationProfile(trt_profile); } @@ -3166,7 +3177,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectorsetFlag(nvinfer1::BuilderFlag::kENABLE_TACTIC_HEURISTIC); LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Builder heuristics are enabled"; } -#if NV_TENSORRT_MINOR > 5 && NV_TENSORRT_MAJOR >= 8 +#if NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR > 5 || NV_TENSORRT_MAJOR > 8 // switch optimizaion level if (trt_state->builder_optimization_level != 3) { trt_config->setBuilderOptimizationLevel(trt_state->builder_optimization_level); diff --git a/onnxruntime/test/framework/function_test.cc b/onnxruntime/test/framework/function_test.cc index 9ab78cac3aca4..fa3545ef27d72 100644 --- a/onnxruntime/test/framework/function_test.cc +++ b/onnxruntime/test/framework/function_test.cc @@ -614,5 +614,59 @@ TEST(FunctionTest, TestInlinedFunctionDoesNotReserrectNonExistingArgs) { AsSpan(output_names), &fetches, 0)); } +/// +/// This test covers the issues: +/// https://github.com/microsoft/onnxruntime/issues/16438 +/// https://github.com/microsoft/onnxruntime/issues/18781 +/// +TEST(FunctionTest, Test_GH_issue_16438) { + const char* code = R"( + < + ir_version: 8, + opset_import: ["pkg.onnxscript.torch_lib" : 1, "" : 18], + producer_name: "pytorch", + producer_version: "2.1.0" + > + torch_jit (float16[5,10,5] input_0) => (double[5,10,5] _val_1) { + _val_1 = pkg.onnxscript.torch_lib.aten_special_log_softmax (input_0) + } + < + domain: "pkg.onnxscript.torch_lib", + opset_import: ["" : 18] + > + aten_special_log_softmax (self) => (result_8) + { + tmp = Shape(self) + tmp_0 = Size(tmp) + int64_0 = Constant () + int64_0_cast = CastLike(int64_0, tmp_0) + self_is_scalar = Equal(tmp_0, int64_0_cast) + self_4 = If(self_is_scalar) (self_2) { + tmp_1 = Constant () + self_2 = Unsqueeze(self, tmp_1) + }, else_branch : graph = elseGraph_8() => (self_3) { + self_3 = Identity(self) + }> + result = LogSoftmax(self_4) + result_5 = Cast(result) + result_8 = If(self_is_scalar) (result_6) { + result_6 = Squeeze(result_5) + }, else_branch : graph = elseGraph_12() => (result_7) { + result_7 = Identity(result_5) + }> + } + )"; + + std::string serialized_model; + ParseOnnxSource(code, serialized_model); + SessionOptions session_options; + InferenceSession session_object{session_options, GetEnvironment()}; + + std::stringstream sstr(serialized_model); + auto status = session_object.Load(sstr); + ASSERT_TRUE(status.IsOK()) << status.ErrorMessage(); + status = session_object.Initialize(); + ASSERT_TRUE(status.IsOK()) << status.ErrorMessage(); +} } // namespace test } // namespace onnxruntime diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index fcf15778c7902..50ca6908520a9 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -242,6 +242,7 @@ stages: runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: true java_artifact_id: onnxruntime_gpu + CudaVersion: 11.8 # CUDA with Tensorrt - template: templates/win-ci.yml @@ -253,10 +254,11 @@ stages: buildArch: x64 msbuildPlatform: x64 packageName: x64-tensorrt - buildparameter: --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8" --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8" --enable_onnx_tests --enable_wcos --build_java --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" + buildparameter: --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8" --cuda_home="$(Agent.TempDirectory)\v11.8" --enable_onnx_tests --enable_wcos --build_java --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: true java_artifact_id: onnxruntime_gpu + CudaVersion: 11.8 UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }} # ROCm