diff --git a/bin/patches/ATD_ASO_full.patch b/bin/patches/ATD_ASO_full.patch index 7880dd34b..02e51a57c 100644 --- a/bin/patches/ATD_ASO_full.patch +++ b/bin/patches/ATD_ASO_full.patch @@ -1,10 +1,18 @@ -diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/include/clang/Driver/Options.td llvm-project/clang/include/clang/Driver/Options.td ---- llvm-project.orig/clang/include/clang/Driver/Options.td 2024-06-12 10:43:11.776219369 -0500 -+++ llvm-project/clang/include/clang/Driver/Options.td 2024-06-12 10:44:09.343614323 -0500 -@@ -6757,6 +6757,10 @@ - defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stride", - PosFlag, - NegFlag>; +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/clang/include/clang/Driver/Options.td llvm-project-aso/clang/include/clang/Driver/Options.td +--- llvm-project-aso-orig/clang/include/clang/Driver/Options.td 2024-11-23 20:25:26.659275825 -0600 ++++ llvm-project-aso/clang/include/clang/Driver/Options.td 2024-11-23 20:39:47.168175409 -0600 +@@ -7027,6 +7027,7 @@ + defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">; + defm implicit_none : OptInFC1FFlag<"implicit-none", "No implicit typing allowed unless overridden by IMPLICIT statements">; + defm underscoring : OptInFC1FFlag<"underscoring", "Appends one trailing underscore to external names">; ++defm offload_global_filtering : OptInFC1FFlag<"offload-global-filtering", "Enable/disable OpenMP global filtering pass">; + defm ppc_native_vec_elem_order: BoolOptionWithoutMarshalling<"f", "ppc-native-vector-element-order", + PosFlag, + NegFlag>; +@@ -7043,6 +7044,10 @@ + + def fhermetic_module_files : Flag<["-"], "fhermetic-module-files">, Group, + HelpText<"Emit hermetic module files (no nested USE association)">; + +def do_concurrent_parallel_EQ : Joined<["-"], "fdo-concurrent-parallel=">, + HelpText<"Try to map `do concurrent` loops to OpenMP (on host or device)">, @@ -12,10 +20,19 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/include/clang/Driver/Optio } // let Visibility = [FC1Option, FlangOption] def J : JoinedOrSeparate<["-"], "J">, -diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp ---- llvm-project.orig/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp 2024-06-12 10:43:11.868218401 -0500 -+++ llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp 2024-06-12 10:44:09.343614323 -0500 -@@ -887,14 +887,13 @@ +@@ -8421,7 +8426,7 @@ + // CUDA Options + //===----------------------------------------------------------------------===// + +-let Visibility = [CC1Option] in { ++let Visibility = [CC1Option, FC1Option] in { + + def fcuda_is_device : Flag<["-"], "fcuda-is-device">, + HelpText<"Generate code for CUDA device">, +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp llvm-project-aso/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +--- llvm-project-aso-orig/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp 2024-11-23 20:25:26.707275652 -0600 ++++ llvm-project-aso/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp 2024-11-23 20:39:47.168175409 -0600 +@@ -862,14 +862,14 @@ void CGOpenMPRuntimeGPU::emitKernelInit(const OMPExecutableDirective &D, CodeGenFunction &CGF, EntryFunctionState &EST, bool IsSPMD) { @@ -25,8 +42,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/lib/CodeGen/CGOpenMPRuntim - MinTeamsVal, MaxTeamsVal); + // Get NumTeams and ThreadLimit attributes. + llvm::OpenMPIRBuilder::TargetKernelDefaultBounds Bounds; -+ computeMinAndMaxThreadsAndTeams(D, CGF, Bounds.MinThreads, Bounds.MaxThreads, -+ Bounds.MinTeams, Bounds.MaxTeams); ++ computeMinAndMaxThreadsAndTeams( ++ D, CGF, Bounds.MinThreads, Bounds.MaxThreads.emplace_back(-1), ++ Bounds.MinTeams, Bounds.MaxTeams.emplace_back(-1)); CGBuilderTy &Bld = CGF.Builder; - Bld.restoreIP(OMPBuilder.createTargetInit( @@ -35,10 +53,27 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/lib/CodeGen/CGOpenMPRuntim if (!IsSPMD) emitGenericVarsProlog(CGF, EST.Loc); } -diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/lib/Driver/ToolChains/Clang.cpp llvm-project/clang/lib/Driver/ToolChains/Clang.cpp ---- llvm-project.orig/clang/lib/Driver/ToolChains/Clang.cpp 2024-06-12 10:43:11.904218022 -0500 -+++ llvm-project/clang/lib/Driver/ToolChains/Clang.cpp 2024-06-12 10:44:09.343614323 -0500 -@@ -8862,7 +8862,9 @@ +@@ -1889,7 +1889,6 @@ + return; + + bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind); +- bool DistributeReduction = isOpenMPDistributeDirective(Options.ReductionKind); + bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind); + + ASTContext &C = CGM.getContext(); +@@ -1986,7 +1985,7 @@ + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createReductionsGPU( + OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, false, TeamsReduction, +- DistributeReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang, ++ llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang, + CGF.getTarget().getGridValue(), + C.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc); + assert(AfterIP && "unexpected error creating GPU reductions"); +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/clang/lib/Driver/ToolChains/Clang.cpp llvm-project-aso/clang/lib/Driver/ToolChains/Clang.cpp +--- llvm-project-aso-orig/clang/lib/Driver/ToolChains/Clang.cpp 2024-11-23 20:25:26.719275609 -0600 ++++ llvm-project-aso/clang/lib/Driver/ToolChains/Clang.cpp 2024-11-23 20:39:47.172175395 -0600 +@@ -9077,7 +9077,9 @@ assert(Input.isFilename() && "Invalid input."); CmdArgs.push_back(Input.getFilename()); @@ -49,10 +84,21 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/lib/Driver/ToolChains/Clan if (D.CC1Main && !D.CCGenDiagnostics) { // Invoke cc1as directly in this process. C.addCommand(std::make_unique( -diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/lib/Driver/ToolChains/Flang.cpp llvm-project/clang/lib/Driver/ToolChains/Flang.cpp ---- llvm-project.orig/clang/lib/Driver/ToolChains/Flang.cpp 2024-06-12 10:43:11.908217981 -0500 -+++ llvm-project/clang/lib/Driver/ToolChains/Flang.cpp 2024-06-12 10:44:09.343614323 -0500 -@@ -141,7 +141,8 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/clang/lib/Driver/ToolChains/Flang.cpp llvm-project-aso/clang/lib/Driver/ToolChains/Flang.cpp +--- llvm-project-aso-orig/clang/lib/Driver/ToolChains/Flang.cpp 2024-11-23 20:25:26.723275595 -0600 ++++ llvm-project-aso/clang/lib/Driver/ToolChains/Flang.cpp 2024-11-23 20:40:24.480040785 -0600 +@@ -120,7 +120,9 @@ + options::OPT_fintrinsic_modules_path, options::OPT_pedantic, + options::OPT_std_EQ, options::OPT_W_Joined, + options::OPT_fconvert_EQ, options::OPT_fpass_plugin_EQ, +- options::OPT_funderscoring, options::OPT_fno_underscoring}); ++ options::OPT_funderscoring, options::OPT_fno_underscoring, ++ options::OPT_foffload_global_filtering, ++ options::OPT_fno_offload_global_filtering}); + + llvm::codegenoptions::DebugInfoKind DebugInfoKind; + if (Args.hasArg(options::OPT_gN_Group)) { +@@ -150,7 +152,8 @@ options::OPT_flang_deprecated_no_hlfir, options::OPT_flang_experimental_integer_overflow, options::OPT_fno_ppc_native_vec_elem_order, @@ -62,9 +108,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/lib/Driver/ToolChains/Flan } void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const { -diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/ClangScanDeps/multiple-commands.c llvm-project/clang/test/ClangScanDeps/multiple-commands.c ---- llvm-project.orig/clang/test/ClangScanDeps/multiple-commands.c 2023-08-31 11:50:49.134212787 -0500 -+++ llvm-project/clang/test/ClangScanDeps/multiple-commands.c 2024-06-12 10:44:09.343614323 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/clang/test/ClangScanDeps/multiple-commands.c llvm-project-aso/clang/test/ClangScanDeps/multiple-commands.c +--- llvm-project-aso-orig/clang/test/ClangScanDeps/multiple-commands.c 2024-10-18 14:42:28.502433945 -0500 ++++ llvm-project-aso/clang/test/ClangScanDeps/multiple-commands.c 2024-11-23 20:39:47.172175395 -0600 @@ -134,7 +134,7 @@ // CHECK-NEXT: "{{.*}}tu_save_temps_module.o" // CHECK: "{{.*}}tu_save_temps_module.s" @@ -74,9 +120,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/ClangScanDeps/multipl // CHECK: "input-file": "[[PREFIX]]{{.}}tu_save_temps_module.c" // CHECK-NEXT: } // CHECK-NEXT: ] -diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/Driver/hip-target-id.hip llvm-project/clang/test/Driver/hip-target-id.hip ---- llvm-project.orig/clang/test/Driver/hip-target-id.hip 2024-06-12 10:43:12.324213607 -0500 -+++ llvm-project/clang/test/Driver/hip-target-id.hip 2024-06-12 10:44:09.347614281 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/clang/test/Driver/hip-target-id.hip llvm-project-aso/clang/test/Driver/hip-target-id.hip +--- llvm-project-aso-orig/clang/test/Driver/hip-target-id.hip 2024-08-27 20:36:24.320182191 -0500 ++++ llvm-project-aso/clang/test/Driver/hip-target-id.hip 2024-11-23 20:39:47.172175395 -0600 @@ -26,7 +26,7 @@ // CHECK-SAME: "-target-feature" "+sramecc" // CHECK-SAME: "-target-feature" "+xnack" @@ -86,9 +132,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/Driver/hip-target-id. // TMP-SAME: "-target-cpu" "gfx908" // TMP-SAME: "-target-feature" "+sramecc" // TMP-SAME: "-target-feature" "+xnack" -diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nested_parallel_for.c llvm-project/clang/test/OpenMP/irbuilder_nested_parallel_for.c ---- llvm-project.orig/clang/test/OpenMP/irbuilder_nested_parallel_for.c 2023-08-31 11:50:49.858235198 -0500 -+++ llvm-project/clang/test/OpenMP/irbuilder_nested_parallel_for.c 2024-06-12 10:44:09.347614281 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/clang/test/OpenMP/irbuilder_nested_parallel_for.c llvm-project-aso/clang/test/OpenMP/irbuilder_nested_parallel_for.c +--- llvm-project-aso-orig/clang/test/OpenMP/irbuilder_nested_parallel_for.c 2024-08-27 20:36:24.516180232 -0500 ++++ llvm-project-aso/clang/test/OpenMP/irbuilder_nested_parallel_for.c 2024-11-23 20:39:47.172175395 -0600 @@ -120,14 +120,14 @@ // CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 @@ -194,14 +240,14 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK: omp.par.exit.split: -// CHECK-NEXT: store i32 0, ptr [[I185]], align 4 --// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0 +-// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[I185]], ptr [[TMP0]], align 8 --// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0 +-// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I185]], align 4 +// CHECK-NEXT: store i32 0, ptr [[I191]], align 4 -+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED192]], i32 0, i32 0 ++// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED192]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I191]], ptr [[TMP0]], align 8 -+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED193]], i32 0, i32 0 ++// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED193]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I191]], align 4 // CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]) @@ -289,35 +335,35 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-NEXT: ret void // // -@@ -507,7 +507,7 @@ +@@ -507,11 +507,11 @@ // CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 -// CHECK-NEXT: [[STRUCTARG214:%.*]] = alloca { ptr, ptr, ptr }, align 8 +-// CHECK-NEXT: [[P_LASTITER178:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_LOWERBOUND179:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_UPPERBOUND180:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[STRUCTARG221:%.*]] = alloca { ptr, ptr, ptr }, align 8 - // CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 - // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 - // CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 -@@ -520,14 +520,14 @@ ++// CHECK-NEXT: [[P_LASTITER183:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_LOWERBOUND184:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_UPPERBOUND185:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_STRIDE186:%.*]] = alloca i32, align 4 + // CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 - // CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +@@ -524,10 +524,10 @@ + // CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 + // CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 + // CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[I160:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AGG_CAPTURED161:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED162:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 -// CHECK-NEXT: [[DOTCOUNT_ADDR163:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_LASTITER178:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_LOWERBOUND179:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_UPPERBOUND180:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I165:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED166:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED167:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR168:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_LASTITER183:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_LOWERBOUND184:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_UPPERBOUND185:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_STRIDE186:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK: omp.par.region: // CHECK-NEXT: store i32 0, ptr [[I]], align 4 @@ -364,9 +410,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-NEXT: br label [[OMP_PAR_EXIT11_SPLIT:%.*]] // CHECK: omp.par.exit11.split: -// CHECK-NEXT: store i32 0, ptr [[I160]], align 4 --// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED161]], i32 0, i32 0 +-// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED161]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[I160]], ptr [[TMP10]], align 8 --// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED162]], i32 0, i32 0 +-// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED162]], i32 0, i32 0 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I160]], align 4 -// CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 -// CHECK-NEXT: call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]]) @@ -397,9 +443,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest -// CHECK-NEXT: br label [[OMP_LOOP_AFTER171:%.*]] -// CHECK: omp_loop.after171: +// CHECK-NEXT: store i32 0, ptr [[I165]], align 4 -+// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED166]], i32 0, i32 0 ++// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED166]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I165]], ptr [[TMP9]], align 8 -+// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED167]], i32 0, i32 0 ++// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED167]], i32 0, i32 0 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I165]], align 4 +// CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-NEXT: call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR168]], ptr [[AGG_CAPTURED166]]) @@ -490,51 +536,52 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-NEXT: ret void // // -@@ -656,7 +656,7 @@ +@@ -656,16 +656,16 @@ // CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 -// CHECK-NEXT: [[STRUCTARG209:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-NEXT: [[STRUCTARG216:%.*]] = alloca { ptr, ptr, ptr }, align 8 // CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 - // CHECK-NEXT: [[TID_ADDR_LOCAL12:%.*]] = alloca i32, align 4 - // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR6]], align 4 -@@ -670,22 +670,22 @@ +-// CHECK-NEXT: [[P_LASTITER153:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_LOWERBOUND154:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_UPPERBOUND155:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_LASTITER93:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_LOWERBOUND94:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_UPPERBOUND95:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_STRIDE96:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_LASTITER157:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_LOWERBOUND158:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_UPPERBOUND159:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_STRIDE160:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_LASTITER95:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_LOWERBOUND96:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_UPPERBOUND97:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_STRIDE98:%.*]] = alloca i32, align 4 + // CHECK-NEXT: [[P_LASTITER34:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[P_LOWERBOUND35:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[P_UPPERBOUND36:%.*]] = alloca i32, align 4 - // CHECK-NEXT: [[P_STRIDE37:%.*]] = alloca i32, align 4 +@@ -678,14 +678,14 @@ + // CHECK-NEXT: [[AGG_CAPTURED17:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 + // CHECK-NEXT: [[AGG_CAPTURED18:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 + // CHECK-NEXT: [[DOTCOUNT_ADDR19:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[I75:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AGG_CAPTURED76:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED77:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 -// CHECK-NEXT: [[DOTCOUNT_ADDR78:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_LASTITER93:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_LOWERBOUND94:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_UPPERBOUND95:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_STRIDE96:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[I135:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AGG_CAPTURED136:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED137:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 -// CHECK-NEXT: [[DOTCOUNT_ADDR138:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_LASTITER153:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_LOWERBOUND154:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_UPPERBOUND155:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I77:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED78:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED79:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR80:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_LASTITER95:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_LOWERBOUND96:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_UPPERBOUND97:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_STRIDE98:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I139:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED140:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED141:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR142:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_LASTITER157:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_LOWERBOUND158:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_UPPERBOUND159:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_STRIDE160:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION9:%.*]] // CHECK: omp.par.region9: // CHECK-NEXT: store i32 0, ptr [[I16]], align 4 @@ -574,9 +621,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest -// CHECK-NEXT: br label [[OMP_PAR_EXIT46_SPLIT:%.*]] -// CHECK: omp.par.exit46.split: -// CHECK-NEXT: store i32 0, ptr [[I75]], align 4 --// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED76]], i32 0, i32 0 +-// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED76]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[I75]], ptr [[TMP10]], align 8 --// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED77]], i32 0, i32 0 +-// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED77]], i32 0, i32 0 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I75]], align 4 -// CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 -// CHECK-NEXT: call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR78]], ptr [[AGG_CAPTURED76]]) @@ -609,9 +656,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest +// CHECK-NEXT: br label [[OMP_PAR_EXIT47_SPLIT:%.*]] +// CHECK: omp.par.exit47.split: +// CHECK-NEXT: store i32 0, ptr [[I77]], align 4 -+// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED78]], i32 0, i32 0 ++// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED78]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I77]], ptr [[TMP9]], align 8 -+// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED79]], i32 0, i32 0 ++// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED79]], i32 0, i32 0 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I77]], align 4 +// CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-NEXT: call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR80]], ptr [[AGG_CAPTURED78]]) @@ -637,9 +684,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest -// CHECK-NEXT: br label [[OMP_PAR_EXIT105_SPLIT:%.*]] -// CHECK: omp.par.exit105.split: -// CHECK-NEXT: store i32 0, ptr [[I135]], align 4 --// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED136]], i32 0, i32 0 +-// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED136]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[I135]], ptr [[TMP18]], align 8 --// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED137]], i32 0, i32 0 +-// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED137]], i32 0, i32 0 -// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[I135]], align 4 -// CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 -// CHECK-NEXT: call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]]) @@ -702,9 +749,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest +// CHECK-NEXT: br label [[OMP_PAR_EXIT108_SPLIT:%.*]] +// CHECK: omp.par.exit108.split: +// CHECK-NEXT: store i32 0, ptr [[I139]], align 4 -+// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED140]], i32 0, i32 0 ++// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED140]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I139]], ptr [[TMP16]], align 8 -+// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED141]], i32 0, i32 0 ++// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED141]], i32 0, i32 0 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[I139]], align 4 +// CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 +// CHECK-NEXT: call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR142]], ptr [[AGG_CAPTURED140]]) @@ -834,6 +881,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +-// CHECK-NEXT: [[P_LASTITER128:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_LOWERBOUND129:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_UPPERBOUND130:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TID_ADDR_LOCAL106:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR100]], align 4 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL106]], align 4 @@ -842,17 +893,17 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest -// CHECK-NEXT: [[AGG_CAPTURED111:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED112:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 -// CHECK-NEXT: [[DOTCOUNT_ADDR113:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_LASTITER128:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_LOWERBOUND129:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_UPPERBOUND130:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 -// CHECK-NEXT: br label [[OMP_PAR_REGION103:%.*]] -// CHECK: omp.par.region103: -// CHECK-NEXT: store i32 0, ptr [[I110]], align 4 --// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED111]], i32 0, i32 0 +-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED111]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[I110]], ptr [[TMP2]], align 8 --// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED112]], i32 0, i32 0 +-// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED112]], i32 0, i32 0 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I110]], align 4 ++// CHECK-NEXT: [[P_LASTITER131:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_LOWERBOUND132:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_UPPERBOUND133:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_STRIDE134:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TID_ADDR_LOCAL109:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR103]], align 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL109]], align 4 @@ -861,16 +912,12 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest +// CHECK-NEXT: [[AGG_CAPTURED114:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED115:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR116:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_LASTITER131:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_LOWERBOUND132:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_UPPERBOUND133:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_STRIDE134:%.*]] = alloca i32, align 4 +// CHECK-NEXT: br label [[OMP_PAR_REGION106:%.*]] +// CHECK: omp.par.region106: +// CHECK-NEXT: store i32 0, ptr [[I113]], align 4 -+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED114]], i32 0, i32 0 ++// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED114]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I113]], ptr [[TMP2]], align 8 -+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED115]], i32 0, i32 0 ++// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED115]], i32 0, i32 0 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I113]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 -// CHECK-NEXT: call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR113]], ptr [[AGG_CAPTURED111]]) @@ -981,6 +1028,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +-// CHECK-NEXT: [[P_LASTITER69:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_LOWERBOUND70:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_UPPERBOUND71:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TID_ADDR_LOCAL47:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR41]], align 4 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL47]], align 4 @@ -989,17 +1040,17 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest -// CHECK-NEXT: [[AGG_CAPTURED52:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 -// CHECK-NEXT: [[DOTCOUNT_ADDR54:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_LASTITER69:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_LOWERBOUND70:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_UPPERBOUND71:%.*]] = alloca i32, align 4 --// CHECK-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 -// CHECK-NEXT: br label [[OMP_PAR_REGION44:%.*]] -// CHECK: omp.par.region44: -// CHECK-NEXT: store i32 0, ptr [[I51]], align 4 --// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED52]], i32 0, i32 0 +-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED52]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[I51]], ptr [[TMP2]], align 8 --// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED53]], i32 0, i32 0 +-// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED53]], i32 0, i32 0 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I51]], align 4 ++// CHECK-NEXT: [[P_LASTITER70:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_LOWERBOUND71:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_UPPERBOUND72:%.*]] = alloca i32, align 4 ++// CHECK-NEXT: [[P_STRIDE73:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TID_ADDR_LOCAL48:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR42]], align 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL48]], align 4 @@ -1008,16 +1059,12 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest +// CHECK-NEXT: [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED54:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR55:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_LASTITER70:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_LOWERBOUND71:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_UPPERBOUND72:%.*]] = alloca i32, align 4 -+// CHECK-NEXT: [[P_STRIDE73:%.*]] = alloca i32, align 4 +// CHECK-NEXT: br label [[OMP_PAR_REGION45:%.*]] +// CHECK: omp.par.region45: +// CHECK-NEXT: store i32 0, ptr [[I52]], align 4 -+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED53]], i32 0, i32 0 ++// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED53]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I52]], ptr [[TMP2]], align 8 -+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED54]], i32 0, i32 0 ++// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED54]], i32 0, i32 0 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I52]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 -// CHECK-NEXT: call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR54]], ptr [[AGG_CAPTURED52]]) @@ -1146,125 +1193,125 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG41:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META39:![0-9]+]], !DIExpression(), [[META40:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META40:![0-9]+]], !DIExpression(), [[META41:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG41]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META41:![0-9]+]], !DIExpression(), [[META40]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META42:![0-9]+]], !DIExpression(), [[META41]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META43:![0-9]+]], metadata !DIExpression()), !dbg [[DBG45:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG46:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG46]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG46]] --// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG45]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META48:![0-9]+]], metadata !DIExpression()), !dbg [[DBG49:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG49]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG44:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG45:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG45]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG45]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG44]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG48]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META42:![0-9]+]], !DIExpression(), [[META44:![0-9]+]]) +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG45:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG45]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG45]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META44]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META47:![0-9]+]], !DIExpression(), [[META48:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META48]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META49:![0-9]+]], !DIExpression(), [[META48]]) +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META48]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META48]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META48]] +-// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META48]] +-// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META48]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META43:![0-9]+]], !DIExpression(), [[META45:![0-9]+]]) ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG46:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG46]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG46]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META45]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META48:![0-9]+]], !DIExpression(), [[META49:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META49]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META50:![0-9]+]], !DIExpression(), [[META49]]) ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META49]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META49]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META49]] ++// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META49]] ++// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META49]] // CHECK-DEBUG: cond.true: --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG49]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG48]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META48]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META48]] +-// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META48]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META48]] +-// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META48]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META48]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META48]] +-// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META48]] +-// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META48]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META49]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META49]] ++// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META49]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META49]] ++// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META49]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META49]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META49]] ++// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META49]] ++// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META49]] // CHECK-DEBUG: cond.false: --// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG49]] -+// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG48]] +-// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META48]] ++// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META49]] // CHECK-DEBUG: cond.end: --// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG49]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG51:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG48]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG50:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META48]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META48]] +-// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META48]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG50:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META49]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META49]] ++// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META49]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG51:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.1 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG53:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG52:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG52:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG53:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META60:![0-9]+]], metadata !DIExpression()), !dbg [[DBG61:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META60:![0-9]+]], !DIExpression(), [[META61:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META61:![0-9]+]], !DIExpression(), [[META62:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG61]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META62:![0-9]+]], !DIExpression(), [[META61]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META63:![0-9]+]], !DIExpression(), [[META62]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG61]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META63:![0-9]+]], !DIExpression(), [[META61]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META64:![0-9]+]], !DIExpression(), [[META62]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG65:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG65]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG67:![0-9]+]] --// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG67]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG67]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG67]] --// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG62]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG65]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG64:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG64]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG66:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG66]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG66]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG66]] -+// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG61]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG64]] +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG64:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG64]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG66:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG66]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG66]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG66]] +-// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META61]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG64]] ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG65:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG65]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG67:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG67]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG67]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG67]] ++// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META62]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG65]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid --// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG70:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG69:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG69:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG70:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[STRUCTARG17:%.*]] = alloca { ptr, ptr, ptr }, align 8 // CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 // CHECK-DEBUG-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[R_ADDR]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[R_ADDR]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[R_ADDR]], [[META75:![0-9]+]], !DIExpression(), [[META76:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[R_ADDR]], [[META76:![0-9]+]], !DIExpression(), [[META77:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META77:![0-9]+]], !DIExpression(), [[META78:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META78:![0-9]+]], !DIExpression(), [[META79:![0-9]+]]) // CHECK-DEBUG-NEXT: store double [[B]], ptr [[B_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG81:![0-9]+]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG82:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG80:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG81:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META79:![0-9]+]], !DIExpression(), [[META80:![0-9]+]]) +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG81:![0-9]+]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META80:![0-9]+]], !DIExpression(), [[META81:![0-9]+]]) ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG82:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 0 @@ -1272,19 +1319,19 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: store ptr [[B_ADDR]], ptr [[GEP_B_ADDR19]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store ptr [[R_ADDR]], ptr [[GEP_R_ADDR20]], align 8 --// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]]), !dbg [[DBG83:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]]), !dbg [[DBG82:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]]), !dbg [[DBG82:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]]), !dbg [[DBG83:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]] // CHECK-DEBUG: omp.par.outlined.exit16: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG85:![0-9]+]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG84:![0-9]+]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG84:![0-9]+]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG85:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid..omp_par.4 --// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG86:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG85:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG85:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG86:![0-9]+]] { // CHECK-DEBUG-NEXT: omp.par.entry: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 @@ -1292,8 +1339,8 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[DBG87:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[DBG86:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[DBG86:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[DBG87:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR1:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -1301,136 +1348,136 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR2]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8 --// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB8]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG89:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB8]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG88:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB8]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG88:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB8]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG89:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK-DEBUG: omp.par.outlined.exit: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT7_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit7.split: --// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG93:![0-9]+]] -+// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG92:![0-9]+]] +-// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG92:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG93:![0-9]+]] // CHECK-DEBUG: omp.par.region.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK-DEBUG: omp.par.pre_finalize: --// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]], !dbg [[DBG93]] -+// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]], !dbg [[DBG92]] +-// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]], !dbg [[DBG92]] ++// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]], !dbg [[DBG93]] // CHECK-DEBUG: omp.par.outlined.exit16.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid..omp_par --// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR2:%.*]], ptr noalias [[ZERO_ADDR3:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG94:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR2:%.*]], ptr noalias [[ZERO_ADDR3:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG93:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR2:%.*]], ptr noalias [[ZERO_ADDR3:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG93:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR2:%.*]], ptr noalias [[ZERO_ADDR3:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG94:![0-9]+]] { // CHECK-DEBUG-NEXT: omp.par.entry4: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 @@ -1745,65 +1745,65 @@ - // CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 + // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION5:%.*]] // CHECK-DEBUG: omp.par.region5: --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG100:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[DBG100]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG101:![0-9]+]] --// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED12]], i32 0, i32 0, !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG102:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG101]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META94:![0-9]+]], metadata !DIExpression()), !dbg [[DBG99:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[DBG99]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG100:![0-9]+]] -+// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED12]], i32 0, i32 0, !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG101:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META94:![0-9]+]], !DIExpression(), [[META99:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META99]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG100:![0-9]+]] +-// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED12]], i32 0, i32 0, !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG101:![0-9]+]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG100]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META95:![0-9]+]], !DIExpression(), [[META100:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META100]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG101:![0-9]+]] ++// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED12]], i32 0, i32 0, !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG102:![0-9]+]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG101]] // CHECK-DEBUG: omp_loop.preheader: --// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10:[0-9]+]]), !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG101]] -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10:[0-9]+]]), !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS1:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS1]], 1, !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10:[0-9]+]]), !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG100]] ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10:[0-9]+]]), !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS1:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS1]], 1, !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG101]] // CHECK-DEBUG: omp_loop.header: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG101]] -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG100]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG101]] // CHECK-DEBUG: omp_loop.cond: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG101]] -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP8]], !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG100]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP8]], !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG101]] // CHECK-DEBUG: omp_loop.exit: --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]]), !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM15:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10]]), !dbg [[DBG103:![0-9]+]] --// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB11:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM15]]), !dbg [[DBG103]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG101]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]]), !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM15:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10]]), !dbg [[DBG102:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB11:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM15]]), !dbg [[DBG102]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]]), !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM15:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10]]), !dbg [[DBG102:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB11:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM15]]), !dbg [[DBG102]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG100]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]]), !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM15:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10]]), !dbg [[DBG103:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB11:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM15]]), !dbg [[DBG103]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG101]] // CHECK-DEBUG: omp_loop.after: --// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION5_PARALLEL_AFTER:%.*]], !dbg [[DBG104:![0-9]+]] -+// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION5_PARALLEL_AFTER:%.*]], !dbg [[DBG103:![0-9]+]] +-// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION5_PARALLEL_AFTER:%.*]], !dbg [[DBG103:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION5_PARALLEL_AFTER:%.*]], !dbg [[DBG104:![0-9]+]] // CHECK-DEBUG: omp.par.region5.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE6:%.*]] // CHECK-DEBUG: omp.par.pre_finalize6: --// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG104]] -+// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG103]] +-// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG103]] ++// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG104]] // CHECK-DEBUG: omp_loop.body: --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG103]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.3(ptr [[I]], i32 [[TMP10]], ptr [[AGG_CAPTURED12]]), !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG105:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG105]] --// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG107:![0-9]+]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP12]], !dbg [[DBG108:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG105]] --// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG109:![0-9]+]] --// CHECK-DEBUG-NEXT: store float [[CONV13]], ptr [[TMP13]], align 4, !dbg [[DBG110:![0-9]+]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG101]] -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG102]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.3(ptr [[I]], i32 [[TMP9]], ptr [[AGG_CAPTURED12]]), !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG104:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double, !dbg [[DBG104]] -+// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG106:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP11]], !dbg [[DBG107:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG104]] -+// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG108:![0-9]+]] -+// CHECK-DEBUG-NEXT: store float [[CONV13]], ptr [[TMP12]], align 4, !dbg [[DBG109:![0-9]+]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG102]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.3(ptr [[I]], i32 [[TMP10]], ptr [[AGG_CAPTURED12]]), !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG104:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG104]] +-// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG106:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP12]], !dbg [[DBG107:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG104]] +-// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG108:![0-9]+]] +-// CHECK-DEBUG-NEXT: store float [[CONV13]], ptr [[TMP13]], align 4, !dbg [[DBG109:![0-9]+]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG100]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG103]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.3(ptr [[I]], i32 [[TMP9]], ptr [[AGG_CAPTURED12]]), !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG105:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double, !dbg [[DBG105]] ++// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG107:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP11]], !dbg [[DBG108:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG105]] ++// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG109:![0-9]+]] ++// CHECK-DEBUG-NEXT: store float [[CONV13]], ptr [[TMP12]], align 4, !dbg [[DBG110:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG101]] // CHECK-DEBUG: omp_loop.inc: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG101]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG101]] -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG100]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG100]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG100]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG101]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG101]] // CHECK-DEBUG: omp.par.outlined.exit.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.2 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG111:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG110:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG110:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG111:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -1438,109 +1485,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META112:![0-9]+]], metadata !DIExpression()), !dbg [[DBG113:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG112:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META111:![0-9]+]], !DIExpression(), [[META112:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META112:![0-9]+]], !DIExpression(), [[META113:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG113]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG112]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META113:![0-9]+]], !DIExpression(), [[META112]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META114:![0-9]+]], !DIExpression(), [[META113]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG118:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG118]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG118]] --// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG117]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG121]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG117:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG117]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG117]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG116]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG120:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG120]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META114:![0-9]+]], !DIExpression(), [[META116:![0-9]+]]) +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG117:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG117]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG117]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META116]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META119:![0-9]+]], !DIExpression(), [[META120:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META120]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META121:![0-9]+]], !DIExpression(), [[META120]]) +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META120]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META120]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META120]] +-// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META120]] +-// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META120]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META115:![0-9]+]], !DIExpression(), [[META117:![0-9]+]]) ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG118:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG118]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG118]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META117]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META120:![0-9]+]], !DIExpression(), [[META121:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META121]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META122:![0-9]+]], !DIExpression(), [[META121]]) ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META121]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META121]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META121]] ++// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META121]] ++// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META121]] // CHECK-DEBUG: cond.true: --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG121]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG120]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META120]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META120]] +-// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META120]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META120]] +-// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META120]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META120]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META120]] +-// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META120]] +-// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META120]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META121]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META121]] ++// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META121]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META121]] ++// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META121]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META121]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META121]] ++// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META121]] ++// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META121]] // CHECK-DEBUG: cond.false: --// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG121]] -+// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG120]] +-// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META120]] ++// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META121]] // CHECK-DEBUG: cond.end: --// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG121]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG123:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG120]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG122:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META120]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META120]] +-// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META120]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG122:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META121]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META121]] ++// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META121]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG123:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.3 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG125:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG124:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG124:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG125:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META125:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META125:![0-9]+]], !DIExpression(), [[META126:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META126:![0-9]+]], !DIExpression(), [[META127:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META127:![0-9]+]], !DIExpression(), [[META126]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META128:![0-9]+]], !DIExpression(), [[META127]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META128:![0-9]+]], !DIExpression(), [[META126]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META129:![0-9]+]], !DIExpression(), [[META127]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG130:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG130]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG132:![0-9]+]] --// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG132]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG132]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG132]] --// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG127]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG130]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG129:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG129]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG131:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG131]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG131]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG131]] -+// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG126]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG129]] +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG129:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG129]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG131:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG131]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG131]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG131]] +-// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META126]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG129]] ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG130:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG130]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG132:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG132]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG132]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG132]] ++// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META127]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG130]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid --// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG133:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG132:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG132:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG133:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 // CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca ptr, align 8 @@ -1563,16 +1610,16 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND211:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE212:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[R_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG135:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[R_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[R_ADDR]], [[META133:![0-9]+]], !DIExpression(), [[META134:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[R_ADDR]], [[META134:![0-9]+]], !DIExpression(), [[META135:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META135:![0-9]+]], !DIExpression(), [[META136:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META136:![0-9]+]], !DIExpression(), [[META137:![0-9]+]]) // CHECK-DEBUG-NEXT: store double [[B]], ptr [[B_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG140:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META137:![0-9]+]], !DIExpression(), [[META138:![0-9]+]]) +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META138:![0-9]+]], !DIExpression(), [[META139:![0-9]+]]) ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG140:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -1580,216 +1627,216 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: store ptr [[B_ADDR]], ptr [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store ptr [[R_ADDR]], ptr [[GEP_R_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG141:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG140:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]] -// CHECK-DEBUG: omp.par.outlined.exit184: -+// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG140:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG141:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT190:%.*]] +// CHECK-DEBUG: omp.par.outlined.exit190: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I185]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 0, ptr [[I185]], align 4, !dbg [[DBG148]] --// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG149:![0-9]+]] --// CHECK-DEBUG-NEXT: store ptr [[I185]], ptr [[TMP0]], align 8, !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[I185]], align 4, !dbg [[DBG150:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]), !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG149]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I185]], [[META144:![0-9]+]], !DIExpression(), [[META147:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[I185]], align 4, !dbg [[META147]] +-// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]] +-// CHECK-DEBUG-NEXT: store ptr [[I185]], ptr [[TMP0]], align 8, !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[I185]], align 4, !dbg [[DBG149:![0-9]+]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]), !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.preheader190: --// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE206]], align 4, !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, ptr [[P_LASTITER203]], ptr [[P_LOWERBOUND204]], ptr [[P_UPPERBOUND205]], ptr [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG149]] +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE206]], align 4, !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, ptr [[P_LASTITER203]], ptr [[P_LOWERBOUND204]], ptr [[P_UPPERBOUND205]], ptr [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.header191: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG149]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.cond192: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG149]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.body193: --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG151:![0-9]+]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]]), !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG152:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG152]] --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG151]] --// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG153:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG152]] --// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG154:![0-9]+]] --// CHECK-DEBUG-NEXT: store float [[CONV202]], ptr [[TMP11]], align 4, !dbg [[DBG155:![0-9]+]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG149]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG150:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]]), !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG151]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG150]] +-// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG151]] +-// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] +-// CHECK-DEBUG-NEXT: store float [[CONV202]], ptr [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.inc194: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG149]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.exit195: --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG149]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG151]] --// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG151]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG149]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG150]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.after196: --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG156:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I191]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[I191]], align 4, !dbg [[DBG147]] -+// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED192]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]] -+// CHECK-DEBUG-NEXT: store ptr [[I191]], ptr [[TMP0]], align 8, !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED193]], i32 0, i32 0, !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[I191]], align 4, !dbg [[DBG149:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR194]], ptr [[AGG_CAPTURED192]]), !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: [[DOTCOUNT195:%.*]] = load i32, ptr [[DOTCOUNT_ADDR194]], align 4, !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER196:%.*]], !dbg [[DBG148]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG155:![0-9]+]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I191]], [[META145:![0-9]+]], !DIExpression(), [[META148:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[I191]], align 4, !dbg [[META148]] ++// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED192]], i32 0, i32 0, !dbg [[DBG149:![0-9]+]] ++// CHECK-DEBUG-NEXT: store ptr [[I191]], ptr [[TMP0]], align 8, !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED193]], i32 0, i32 0, !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[I191]], align 4, !dbg [[DBG150:![0-9]+]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR194]], ptr [[AGG_CAPTURED192]]), !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: [[DOTCOUNT195:%.*]] = load i32, ptr [[DOTCOUNT_ADDR194]], align 4, !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER196:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.preheader196: -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND210]], align 4, !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT195]], 1, !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND211]], align 4, !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE212]], align 4, !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM213:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM213]], i32 34, ptr [[P_LASTITER209]], ptr [[P_LOWERBOUND210]], ptr [[P_UPPERBOUND211]], ptr [[P_STRIDE212]], i32 1, i32 0), !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND210]], align 4, !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND211]], align 4, !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS1214:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = add i32 [[TRIP_COUNT_MINUS1214]], 1, !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER197:%.*]], !dbg [[DBG148]] ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND210]], align 4, !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT195]], 1, !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND211]], align 4, !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE212]], align 4, !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM213:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM213]], i32 34, ptr [[P_LASTITER209]], ptr [[P_LOWERBOUND210]], ptr [[P_UPPERBOUND211]], ptr [[P_STRIDE212]], i32 1, i32 0), !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND210]], align 4, !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND211]], align 4, !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS1214:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = add i32 [[TRIP_COUNT_MINUS1214]], 1, !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER197:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.header197: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV203:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER196]] ], [ [[OMP_LOOP_NEXT205:%.*]], [[OMP_LOOP_INC200:%.*]] ], !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND198:%.*]], !dbg [[DBG148]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV203:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER196]] ], [ [[OMP_LOOP_NEXT205:%.*]], [[OMP_LOOP_INC200:%.*]] ], !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND198:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.cond198: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP204:%.*]] = icmp ult i32 [[OMP_LOOP_IV203]], [[TMP6]], !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP204]], label [[OMP_LOOP_BODY199:%.*]], label [[OMP_LOOP_EXIT201:%.*]], !dbg [[DBG148]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP204:%.*]] = icmp ult i32 [[OMP_LOOP_IV203]], [[TMP6]], !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP204]], label [[OMP_LOOP_BODY199:%.*]], label [[OMP_LOOP_EXIT201:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.body199: -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[OMP_LOOP_IV203]], [[TMP4]], !dbg [[DBG150:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(ptr [[I191]], i32 [[TMP7]], ptr [[AGG_CAPTURED193]]), !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV206:%.*]] = sitofp i32 [[TMP8]] to double, !dbg [[DBG151]] -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG150]] -+// CHECK-DEBUG-NEXT: [[ADD207:%.*]] = fadd double [[CONV206]], [[TMP9]], !dbg [[DBG152:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV208:%.*]] = fptrunc double [[ADD207]] to float, !dbg [[DBG151]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] -+// CHECK-DEBUG-NEXT: store float [[CONV208]], ptr [[TMP10]], align 4, !dbg [[DBG154:![0-9]+]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC200]], !dbg [[DBG148]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[OMP_LOOP_IV203]], [[TMP4]], !dbg [[DBG151:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(ptr [[I191]], i32 [[TMP7]], ptr [[AGG_CAPTURED193]]), !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG152:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV206:%.*]] = sitofp i32 [[TMP8]] to double, !dbg [[DBG152]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG151]] ++// CHECK-DEBUG-NEXT: [[ADD207:%.*]] = fadd double [[CONV206]], [[TMP9]], !dbg [[DBG153:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV208:%.*]] = fptrunc double [[ADD207]] to float, !dbg [[DBG152]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG154:![0-9]+]] ++// CHECK-DEBUG-NEXT: store float [[CONV208]], ptr [[TMP10]], align 4, !dbg [[DBG155:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC200]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.inc200: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT205]] = add nuw i32 [[OMP_LOOP_IV203]], 1, !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER197]], !dbg [[DBG148]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT205]] = add nuw i32 [[OMP_LOOP_IV203]], 1, !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER197]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.exit201: -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM213]]), !dbg [[DBG148]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM215:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG150]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM215]]), !dbg [[DBG150]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER202:%.*]], !dbg [[DBG148]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM213]]), !dbg [[DBG149]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM215:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG151]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM215]]), !dbg [[DBG151]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER202:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.after202: -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG155:![0-9]+]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG156:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.23 --// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG157:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG156:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG156:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG157:![0-9]+]] { // CHECK-DEBUG-NEXT: omp.par.entry: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 -@@ -1965,7 +1965,7 @@ +@@ -1965,11 +1965,11 @@ // CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[STRUCTARG214:%.*]] = alloca { ptr, ptr, ptr }, align 8 +-// CHECK-DEBUG-NEXT: [[P_LASTITER178:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_LOWERBOUND179:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_UPPERBOUND180:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[STRUCTARG221:%.*]] = alloca { ptr, ptr, ptr }, align 8 - // CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 - // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 - // CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 -@@ -1978,137 +1978,137 @@ ++// CHECK-DEBUG-NEXT: [[P_LASTITER183:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_LOWERBOUND184:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_UPPERBOUND185:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_STRIDE186:%.*]] = alloca i32, align 4 + // CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 - // CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +@@ -1982,133 +1982,133 @@ + // CHECK-DEBUG-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 + // CHECK-DEBUG-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 + // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[I160:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED161:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED162:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR163:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_LASTITER178:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_LOWERBOUND179:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_UPPERBOUND180:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[I165:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED166:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED167:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR168:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_LASTITER183:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_LOWERBOUND184:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_UPPERBOUND185:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_STRIDE186:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META158:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[DBG162]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG163:![0-9]+]] --// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG164:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.5(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG163]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG161:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[DBG161]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG162:![0-9]+]] -+// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG163:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.5(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META157:![0-9]+]], !DIExpression(), [[META161:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META161]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG162:![0-9]+]] +-// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG163:![0-9]+]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.5(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG162]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META158:![0-9]+]], !DIExpression(), [[META162:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META162]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG163:![0-9]+]] ++// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG164:![0-9]+]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.5(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG163]] // CHECK-DEBUG: omp_loop.preheader: --// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]), !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG163]] -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]), !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS1:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS1]], 1, !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]), !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG162]] ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]), !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS1:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS1]], 1, !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG163]] // CHECK-DEBUG: omp_loop.header: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG163]] -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG162]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG163]] // CHECK-DEBUG: omp_loop.cond: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG163]] -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP8]], !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG162]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP8]], !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG163]] // CHECK-DEBUG: omp_loop.exit: --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15]]), !dbg [[DBG165:![0-9]+]] --// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB16:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]), !dbg [[DBG165]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG163]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15]]), !dbg [[DBG164:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB16:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]), !dbg [[DBG164]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15]]), !dbg [[DBG164:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB16:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]), !dbg [[DBG164]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG162]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15]]), !dbg [[DBG165:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB16:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]), !dbg [[DBG165]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG163]] // CHECK-DEBUG: omp_loop.after: --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB18:[0-9]+]]), !dbg [[DBG166:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB18:[0-9]+]]), !dbg [[DBG165:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB18:[0-9]+]]), !dbg [[DBG165:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB18:[0-9]+]]), !dbg [[DBG166:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: [[GEP_A_ADDR215:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG214]], i32 0, i32 0 @@ -1798,7 +1845,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest -// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR216]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_R_ADDR217:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG214]], i32 0, i32 2 -// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR217]], align 8 --// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG214]]), !dbg [[DBG167:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG214]]), !dbg [[DBG166:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT159:%.*]] -// CHECK-DEBUG: omp.par.outlined.exit159: +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR222:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG221]], i32 0, i32 0 @@ -1807,263 +1854,264 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest +// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR223]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR224:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG221]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR224]], align 8 -+// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG221]]), !dbg [[DBG166:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG221]]), !dbg [[DBG167:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT164:%.*]] +// CHECK-DEBUG: omp.par.outlined.exit164: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT11_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit11.split: --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I160]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG174:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 0, ptr [[I160]], align 4, !dbg [[DBG174]] --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED161]], i32 0, i32 0, !dbg [[DBG175:![0-9]+]] --// CHECK-DEBUG-NEXT: store ptr [[I160]], ptr [[TMP10]], align 8, !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED162]], i32 0, i32 0, !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load i32, ptr [[I160]], align 4, !dbg [[DBG176:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]]), !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: [[DOTCOUNT164:%.*]] = load i32, ptr [[DOTCOUNT_ADDR163]], align 4, !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER165:%.*]], !dbg [[DBG175]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I160]], [[META170:![0-9]+]], !DIExpression(), [[META173:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[I160]], align 4, !dbg [[META173]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED161]], i32 0, i32 0, !dbg [[DBG174:![0-9]+]] +-// CHECK-DEBUG-NEXT: store ptr [[I160]], ptr [[TMP10]], align 8, !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED162]], i32 0, i32 0, !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load i32, ptr [[I160]], align 4, !dbg [[DBG175:![0-9]+]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]]), !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: [[DOTCOUNT164:%.*]] = load i32, ptr [[DOTCOUNT_ADDR163]], align 4, !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER165:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.preheader165: --// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT164]], 1, !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE181]], align 4, !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM182:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39:[0-9]+]]), !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]], i32 34, ptr [[P_LASTITER178]], ptr [[P_LOWERBOUND179]], ptr [[P_UPPERBOUND180]], ptr [[P_STRIDE181]], i32 1, i32 0), !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER166:%.*]], !dbg [[DBG175]] +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT164]], 1, !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE181]], align 4, !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM182:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39:[0-9]+]]), !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]], i32 34, ptr [[P_LASTITER178]], ptr [[P_LOWERBOUND179]], ptr [[P_UPPERBOUND180]], ptr [[P_STRIDE181]], i32 1, i32 0), !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER166:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.header166: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV172:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER165]] ], [ [[OMP_LOOP_NEXT174:%.*]], [[OMP_LOOP_INC169:%.*]] ], !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND167:%.*]], !dbg [[DBG175]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV172:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER165]] ], [ [[OMP_LOOP_NEXT174:%.*]], [[OMP_LOOP_INC169:%.*]] ], !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND167:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.cond167: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP173:%.*]] = icmp ult i32 [[OMP_LOOP_IV172]], [[TMP17]], !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP173]], label [[OMP_LOOP_BODY168:%.*]], label [[OMP_LOOP_EXIT170:%.*]], !dbg [[DBG175]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP173:%.*]] = icmp ult i32 [[OMP_LOOP_IV172]], [[TMP17]], !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP173]], label [[OMP_LOOP_BODY168:%.*]], label [[OMP_LOOP_EXIT170:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.exit170: --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]]), !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM183:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG177:![0-9]+]] --// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM183]]), !dbg [[DBG177]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER171:%.*]], !dbg [[DBG175]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]]), !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM183:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG176:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM183]]), !dbg [[DBG176]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER171:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.after171: --// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG178:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I165]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[I165]], align 4, !dbg [[DBG173]] -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED166]], i32 0, i32 0, !dbg [[DBG174:![0-9]+]] -+// CHECK-DEBUG-NEXT: store ptr [[I165]], ptr [[TMP9]], align 8, !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED167]], i32 0, i32 0, !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[I165]], align 4, !dbg [[DBG175:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4, !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR168]], ptr [[AGG_CAPTURED166]]), !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: [[DOTCOUNT169:%.*]] = load i32, ptr [[DOTCOUNT_ADDR168]], align 4, !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER170:%.*]], !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG177:![0-9]+]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I165]], [[META171:![0-9]+]], !DIExpression(), [[META174:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[I165]], align 4, !dbg [[META174]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED166]], i32 0, i32 0, !dbg [[DBG175:![0-9]+]] ++// CHECK-DEBUG-NEXT: store ptr [[I165]], ptr [[TMP9]], align 8, !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED167]], i32 0, i32 0, !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[I165]], align 4, !dbg [[DBG176:![0-9]+]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4, !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR168]], ptr [[AGG_CAPTURED166]]), !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: [[DOTCOUNT169:%.*]] = load i32, ptr [[DOTCOUNT_ADDR168]], align 4, !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER170:%.*]], !dbg [[DBG175]] +// CHECK-DEBUG: omp_loop.preheader170: -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND184]], align 4, !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = sub i32 [[DOTCOUNT169]], 1, !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[P_UPPERBOUND185]], align 4, !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE186]], align 4, !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM187:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39:[0-9]+]]), !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM187]], i32 34, ptr [[P_LASTITER183]], ptr [[P_LOWERBOUND184]], ptr [[P_UPPERBOUND185]], ptr [[P_STRIDE186]], i32 1, i32 0), !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load i32, ptr [[P_LOWERBOUND184]], align 4, !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_UPPERBOUND185]], align 4, !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS1188:%.*]] = sub i32 [[TMP14]], [[TMP13]], !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = add i32 [[TRIP_COUNT_MINUS1188]], 1, !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER171:%.*]], !dbg [[DBG174]] ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND184]], align 4, !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = sub i32 [[DOTCOUNT169]], 1, !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[P_UPPERBOUND185]], align 4, !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE186]], align 4, !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM187:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39:[0-9]+]]), !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM187]], i32 34, ptr [[P_LASTITER183]], ptr [[P_LOWERBOUND184]], ptr [[P_UPPERBOUND185]], ptr [[P_STRIDE186]], i32 1, i32 0), !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load i32, ptr [[P_LOWERBOUND184]], align 4, !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_UPPERBOUND185]], align 4, !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS1188:%.*]] = sub i32 [[TMP14]], [[TMP13]], !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = add i32 [[TRIP_COUNT_MINUS1188]], 1, !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER171:%.*]], !dbg [[DBG175]] +// CHECK-DEBUG: omp_loop.header171: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV177:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER170]] ], [ [[OMP_LOOP_NEXT179:%.*]], [[OMP_LOOP_INC174:%.*]] ], !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND172:%.*]], !dbg [[DBG174]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV177:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER170]] ], [ [[OMP_LOOP_NEXT179:%.*]], [[OMP_LOOP_INC174:%.*]] ], !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND172:%.*]], !dbg [[DBG175]] +// CHECK-DEBUG: omp_loop.cond172: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP178:%.*]] = icmp ult i32 [[OMP_LOOP_IV177]], [[TMP15]], !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP178]], label [[OMP_LOOP_BODY173:%.*]], label [[OMP_LOOP_EXIT175:%.*]], !dbg [[DBG174]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP178:%.*]] = icmp ult i32 [[OMP_LOOP_IV177]], [[TMP15]], !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP178]], label [[OMP_LOOP_BODY173:%.*]], label [[OMP_LOOP_EXIT175:%.*]], !dbg [[DBG175]] +// CHECK-DEBUG: omp_loop.exit175: -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM187]]), !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM189:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG176:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM189]]), !dbg [[DBG176]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER176:%.*]], !dbg [[DBG174]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM187]]), !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM189:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG177:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM189]]), !dbg [[DBG177]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER176:%.*]], !dbg [[DBG175]] +// CHECK-DEBUG: omp_loop.after176: -+// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG177:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG178:![0-9]+]] // CHECK-DEBUG: omp.par.region.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK-DEBUG: omp.par.pre_finalize: --// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184_EXITSTUB:%.*]], !dbg [[DBG178]] +-// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184_EXITSTUB:%.*]], !dbg [[DBG177]] -// CHECK-DEBUG: omp_loop.body168: --// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV172]], [[TMP14]], !dbg [[DBG177]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.18(ptr [[I160]], i32 [[TMP18]], ptr [[AGG_CAPTURED162]]), !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG179:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV175:%.*]] = sitofp i32 [[TMP19]] to double, !dbg [[DBG179]] --// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG177]] --// CHECK-DEBUG-NEXT: [[ADD176:%.*]] = fadd double [[CONV175]], [[TMP20]], !dbg [[DBG180:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV177:%.*]] = fptrunc double [[ADD176]] to float, !dbg [[DBG179]] --// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG181:![0-9]+]] --// CHECK-DEBUG-NEXT: store float [[CONV177]], ptr [[TMP21]], align 4, !dbg [[DBG182:![0-9]+]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC169]], !dbg [[DBG175]] +-// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV172]], [[TMP14]], !dbg [[DBG176]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.18(ptr [[I160]], i32 [[TMP18]], ptr [[AGG_CAPTURED162]]), !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG178:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV175:%.*]] = sitofp i32 [[TMP19]] to double, !dbg [[DBG178]] +-// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG176]] +-// CHECK-DEBUG-NEXT: [[ADD176:%.*]] = fadd double [[CONV175]], [[TMP20]], !dbg [[DBG179:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV177:%.*]] = fptrunc double [[ADD176]] to float, !dbg [[DBG178]] +-// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG180:![0-9]+]] +-// CHECK-DEBUG-NEXT: store float [[CONV177]], ptr [[TMP21]], align 4, !dbg [[DBG181:![0-9]+]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC169]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.inc169: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT174]] = add nuw i32 [[OMP_LOOP_IV172]], 1, !dbg [[DBG175]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER166]], !dbg [[DBG175]] -+// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT190_EXITSTUB:%.*]], !dbg [[DBG177]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT174]] = add nuw i32 [[OMP_LOOP_IV172]], 1, !dbg [[DBG174]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER166]], !dbg [[DBG174]] ++// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT190_EXITSTUB:%.*]], !dbg [[DBG178]] +// CHECK-DEBUG: omp_loop.body173: -+// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = add i32 [[OMP_LOOP_IV177]], [[TMP13]], !dbg [[DBG176]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.18(ptr [[I165]], i32 [[TMP16]], ptr [[AGG_CAPTURED167]]), !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG178:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV180:%.*]] = sitofp i32 [[TMP17]] to double, !dbg [[DBG178]] -+// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG176]] -+// CHECK-DEBUG-NEXT: [[ADD181:%.*]] = fadd double [[CONV180]], [[TMP18]], !dbg [[DBG179:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV182:%.*]] = fptrunc double [[ADD181]] to float, !dbg [[DBG178]] -+// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG180:![0-9]+]] -+// CHECK-DEBUG-NEXT: store float [[CONV182]], ptr [[TMP19]], align 4, !dbg [[DBG181:![0-9]+]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC174]], !dbg [[DBG174]] ++// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = add i32 [[OMP_LOOP_IV177]], [[TMP13]], !dbg [[DBG177]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.18(ptr [[I165]], i32 [[TMP16]], ptr [[AGG_CAPTURED167]]), !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG179:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV180:%.*]] = sitofp i32 [[TMP17]] to double, !dbg [[DBG179]] ++// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG177]] ++// CHECK-DEBUG-NEXT: [[ADD181:%.*]] = fadd double [[CONV180]], [[TMP18]], !dbg [[DBG180:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV182:%.*]] = fptrunc double [[ADD181]] to float, !dbg [[DBG179]] ++// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG181:![0-9]+]] ++// CHECK-DEBUG-NEXT: store float [[CONV182]], ptr [[TMP19]], align 4, !dbg [[DBG182:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC174]], !dbg [[DBG175]] +// CHECK-DEBUG: omp_loop.inc174: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT179]] = add nuw i32 [[OMP_LOOP_IV177]], 1, !dbg [[DBG174]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER171]], !dbg [[DBG174]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT179]] = add nuw i32 [[OMP_LOOP_IV177]], 1, !dbg [[DBG175]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER171]], !dbg [[DBG175]] // CHECK-DEBUG: omp_loop.body: --// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG165]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.6(ptr [[I]], i32 [[TMP22]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG183:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP23]] to double, !dbg [[DBG183]] --// CHECK-DEBUG-NEXT: [[TMP24:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG165]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP24]], !dbg [[DBG184:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG183]] --// CHECK-DEBUG-NEXT: [[TMP25:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG185:![0-9]+]] --// CHECK-DEBUG-NEXT: store float [[CONV2]], ptr [[TMP25]], align 4, !dbg [[DBG186:![0-9]+]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG163]] -+// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG164]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.6(ptr [[I]], i32 [[TMP20]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG182:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP21]] to double, !dbg [[DBG182]] -+// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG164]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP22]], !dbg [[DBG183:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG182]] -+// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG184:![0-9]+]] -+// CHECK-DEBUG-NEXT: store float [[CONV2]], ptr [[TMP23]], align 4, !dbg [[DBG185:![0-9]+]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG164]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.6(ptr [[I]], i32 [[TMP22]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG182:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP23]] to double, !dbg [[DBG182]] +-// CHECK-DEBUG-NEXT: [[TMP24:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG164]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP24]], !dbg [[DBG183:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG182]] +-// CHECK-DEBUG-NEXT: [[TMP25:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG184:![0-9]+]] +-// CHECK-DEBUG-NEXT: store float [[CONV2]], ptr [[TMP25]], align 4, !dbg [[DBG185:![0-9]+]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG162]] ++// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG165]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.6(ptr [[I]], i32 [[TMP20]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG183:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP21]] to double, !dbg [[DBG183]] ++// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG165]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP22]], !dbg [[DBG184:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG183]] ++// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG185:![0-9]+]] ++// CHECK-DEBUG-NEXT: store float [[CONV2]], ptr [[TMP23]], align 4, !dbg [[DBG186:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG163]] // CHECK-DEBUG: omp_loop.inc: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG163]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG163]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG162]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG162]] -// CHECK-DEBUG: omp.par.outlined.exit184.exitStub: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG162]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG162]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG163]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG163]] +// CHECK-DEBUG: omp.par.outlined.exit190.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.22 --// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR6:%.*]], ptr noalias [[ZERO_ADDR7:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG187:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR6:%.*]], ptr noalias [[ZERO_ADDR7:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG186:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR6:%.*]], ptr noalias [[ZERO_ADDR7:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG186:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR6:%.*]], ptr noalias [[ZERO_ADDR7:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG187:![0-9]+]] { // CHECK-DEBUG-NEXT: omp.par.entry8: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 -@@ -2116,7 +2116,7 @@ +@@ -2116,16 +2116,16 @@ // CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[STRUCTARG209:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-DEBUG-NEXT: [[STRUCTARG216:%.*]] = alloca { ptr, ptr, ptr }, align 8 // CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 - // CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL12:%.*]] = alloca i32, align 4 - // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR6]], align 4 -@@ -2130,59 +2130,59 @@ +-// CHECK-DEBUG-NEXT: [[P_LASTITER153:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_LOWERBOUND154:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_UPPERBOUND155:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_LASTITER93:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_LOWERBOUND94:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_UPPERBOUND95:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_STRIDE96:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_LASTITER157:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_LOWERBOUND158:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_UPPERBOUND159:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_STRIDE160:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_LASTITER95:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_LOWERBOUND96:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_UPPERBOUND97:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_STRIDE98:%.*]] = alloca i32, align 4 + // CHECK-DEBUG-NEXT: [[P_LASTITER34:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[P_LOWERBOUND35:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[P_UPPERBOUND36:%.*]] = alloca i32, align 4 - // CHECK-DEBUG-NEXT: [[P_STRIDE37:%.*]] = alloca i32, align 4 +@@ -2138,51 +2138,51 @@ + // CHECK-DEBUG-NEXT: [[AGG_CAPTURED17:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 + // CHECK-DEBUG-NEXT: [[AGG_CAPTURED18:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 + // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR19:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[I75:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED76:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED77:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR78:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_LASTITER93:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_LOWERBOUND94:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_UPPERBOUND95:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_STRIDE96:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[I135:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED136:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED137:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR138:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_LASTITER153:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_LOWERBOUND154:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_UPPERBOUND155:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[I77:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED78:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED79:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR80:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_LASTITER95:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_LOWERBOUND96:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_UPPERBOUND97:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_STRIDE98:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[I139:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED140:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED141:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR142:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_LASTITER157:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_LOWERBOUND158:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_UPPERBOUND159:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_STRIDE160:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9:%.*]] // CHECK-DEBUG: omp.par.region9: --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I16]], metadata [[META188:![0-9]+]], metadata !DIExpression()), !dbg [[DBG193:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 0, ptr [[I16]], align 4, !dbg [[DBG193]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED17]], i32 0, i32 0, !dbg [[DBG194:![0-9]+]] --// CHECK-DEBUG-NEXT: store ptr [[I16]], ptr [[TMP2]], align 8, !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED18]], i32 0, i32 0, !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I16]], align 4, !dbg [[DBG195:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.7(ptr [[DOTCOUNT_ADDR19]], ptr [[AGG_CAPTURED17]]), !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: [[DOTCOUNT20:%.*]] = load i32, ptr [[DOTCOUNT_ADDR19]], align 4, !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER21:%.*]], !dbg [[DBG194]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I16]], metadata [[META187:![0-9]+]], metadata !DIExpression()), !dbg [[DBG192:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[I16]], align 4, !dbg [[DBG192]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED17]], i32 0, i32 0, !dbg [[DBG193:![0-9]+]] -+// CHECK-DEBUG-NEXT: store ptr [[I16]], ptr [[TMP2]], align 8, !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED18]], i32 0, i32 0, !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I16]], align 4, !dbg [[DBG194:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.7(ptr [[DOTCOUNT_ADDR19]], ptr [[AGG_CAPTURED17]]), !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: [[DOTCOUNT20:%.*]] = load i32, ptr [[DOTCOUNT_ADDR19]], align 4, !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER21:%.*]], !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I16]], [[META187:![0-9]+]], !DIExpression(), [[META192:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[I16]], align 4, !dbg [[META192]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED17]], i32 0, i32 0, !dbg [[DBG193:![0-9]+]] +-// CHECK-DEBUG-NEXT: store ptr [[I16]], ptr [[TMP2]], align 8, !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED18]], i32 0, i32 0, !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I16]], align 4, !dbg [[DBG194:![0-9]+]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.7(ptr [[DOTCOUNT_ADDR19]], ptr [[AGG_CAPTURED17]]), !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: [[DOTCOUNT20:%.*]] = load i32, ptr [[DOTCOUNT_ADDR19]], align 4, !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER21:%.*]], !dbg [[DBG193]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I16]], [[META188:![0-9]+]], !DIExpression(), [[META193:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[I16]], align 4, !dbg [[META193]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED17]], i32 0, i32 0, !dbg [[DBG194:![0-9]+]] ++// CHECK-DEBUG-NEXT: store ptr [[I16]], ptr [[TMP2]], align 8, !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED18]], i32 0, i32 0, !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I16]], align 4, !dbg [[DBG195:![0-9]+]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.7(ptr [[DOTCOUNT_ADDR19]], ptr [[AGG_CAPTURED17]]), !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: [[DOTCOUNT20:%.*]] = load i32, ptr [[DOTCOUNT_ADDR19]], align 4, !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER21:%.*]], !dbg [[DBG194]] // CHECK-DEBUG: omp_loop.preheader21: --// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT20]], 1, !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE37]], align 4, !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM38:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]], i32 34, ptr [[P_LASTITER34]], ptr [[P_LOWERBOUND35]], ptr [[P_UPPERBOUND36]], ptr [[P_STRIDE37]], i32 1, i32 0), !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER22:%.*]], !dbg [[DBG194]] -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT20]], 1, !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE37]], align 4, !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM38:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]], i32 34, ptr [[P_LASTITER34]], ptr [[P_LOWERBOUND35]], ptr [[P_UPPERBOUND36]], ptr [[P_STRIDE37]], i32 1, i32 0), !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS139:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS139]], 1, !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER22:%.*]], !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT20]], 1, !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE37]], align 4, !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM38:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]], i32 34, ptr [[P_LASTITER34]], ptr [[P_LOWERBOUND35]], ptr [[P_UPPERBOUND36]], ptr [[P_STRIDE37]], i32 1, i32 0), !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER22:%.*]], !dbg [[DBG193]] ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT20]], 1, !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE37]], align 4, !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM38:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]], i32 34, ptr [[P_LASTITER34]], ptr [[P_LOWERBOUND35]], ptr [[P_UPPERBOUND36]], ptr [[P_STRIDE37]], i32 1, i32 0), !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS139:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS139]], 1, !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER22:%.*]], !dbg [[DBG194]] // CHECK-DEBUG: omp_loop.header22: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV28:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER21]] ], [ [[OMP_LOOP_NEXT30:%.*]], [[OMP_LOOP_INC25:%.*]] ], !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND23:%.*]], !dbg [[DBG194]] -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV28:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER21]] ], [ [[OMP_LOOP_NEXT30:%.*]], [[OMP_LOOP_INC25:%.*]] ], !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND23:%.*]], !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV28:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER21]] ], [ [[OMP_LOOP_NEXT30:%.*]], [[OMP_LOOP_INC25:%.*]] ], !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND23:%.*]], !dbg [[DBG193]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV28:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER21]] ], [ [[OMP_LOOP_NEXT30:%.*]], [[OMP_LOOP_INC25:%.*]] ], !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND23:%.*]], !dbg [[DBG194]] // CHECK-DEBUG: omp_loop.cond23: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP29:%.*]] = icmp ult i32 [[OMP_LOOP_IV28]], [[TMP9]], !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP29]], label [[OMP_LOOP_BODY24:%.*]], label [[OMP_LOOP_EXIT26:%.*]], !dbg [[DBG194]] -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP29:%.*]] = icmp ult i32 [[OMP_LOOP_IV28]], [[TMP8]], !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP29]], label [[OMP_LOOP_BODY24:%.*]], label [[OMP_LOOP_EXIT26:%.*]], !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP29:%.*]] = icmp ult i32 [[OMP_LOOP_IV28]], [[TMP9]], !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP29]], label [[OMP_LOOP_BODY24:%.*]], label [[OMP_LOOP_EXIT26:%.*]], !dbg [[DBG193]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP29:%.*]] = icmp ult i32 [[OMP_LOOP_IV28]], [[TMP8]], !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP29]], label [[OMP_LOOP_BODY24:%.*]], label [[OMP_LOOP_EXIT26:%.*]], !dbg [[DBG194]] // CHECK-DEBUG: omp_loop.exit26: --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]]), !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM39:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20]]), !dbg [[DBG196:![0-9]+]] --// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB21:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM39]]), !dbg [[DBG196]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER27:%.*]], !dbg [[DBG194]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]]), !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20]]), !dbg [[DBG195:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB21:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]]), !dbg [[DBG195]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER27:%.*]], !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]]), !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM39:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20]]), !dbg [[DBG195:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB21:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM39]]), !dbg [[DBG195]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER27:%.*]], !dbg [[DBG193]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]]), !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20]]), !dbg [[DBG196:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB21:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]]), !dbg [[DBG196]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER27:%.*]], !dbg [[DBG194]] // CHECK-DEBUG: omp_loop.after27: --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23:[0-9]+]]), !dbg [[DBG197:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23:[0-9]+]]), !dbg [[DBG196:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23:[0-9]+]]), !dbg [[DBG196:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23:[0-9]+]]), !dbg [[DBG197:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR1:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -2071,47 +2119,47 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR2]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8 --// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG198:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG197:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG197:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG198:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK-DEBUG: omp.par.outlined.exit: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT46_SPLIT:%.*]] -// CHECK-DEBUG: omp.par.exit46.split: --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I75]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 0, ptr [[I75]], align 4, !dbg [[DBG205]] --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED76]], i32 0, i32 0, !dbg [[DBG206:![0-9]+]] --// CHECK-DEBUG-NEXT: store ptr [[I75]], ptr [[TMP10]], align 8, !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED77]], i32 0, i32 0, !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load i32, ptr [[I75]], align 4, !dbg [[DBG207:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR78]], ptr [[AGG_CAPTURED76]]), !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: [[DOTCOUNT79:%.*]] = load i32, ptr [[DOTCOUNT_ADDR78]], align 4, !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER80:%.*]], !dbg [[DBG206]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I75]], [[META201:![0-9]+]], !DIExpression(), [[META204:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[I75]], align 4, !dbg [[META204]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED76]], i32 0, i32 0, !dbg [[DBG205:![0-9]+]] +-// CHECK-DEBUG-NEXT: store ptr [[I75]], ptr [[TMP10]], align 8, !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED77]], i32 0, i32 0, !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load i32, ptr [[I75]], align 4, !dbg [[DBG206:![0-9]+]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR78]], ptr [[AGG_CAPTURED76]]), !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: [[DOTCOUNT79:%.*]] = load i32, ptr [[DOTCOUNT_ADDR78]], align 4, !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER80:%.*]], !dbg [[DBG205]] -// CHECK-DEBUG: omp_loop.preheader80: --// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND94]], align 4, !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT79]], 1, !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND95]], align 4, !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE96]], align 4, !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM97:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28:[0-9]+]]), !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM97]], i32 34, ptr [[P_LASTITER93]], ptr [[P_LOWERBOUND94]], ptr [[P_UPPERBOUND95]], ptr [[P_STRIDE96]], i32 1, i32 0), !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND94]], align 4, !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND95]], align 4, !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER81:%.*]], !dbg [[DBG206]] +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND94]], align 4, !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT79]], 1, !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND95]], align 4, !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE96]], align 4, !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM97:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28:[0-9]+]]), !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM97]], i32 34, ptr [[P_LASTITER93]], ptr [[P_LOWERBOUND94]], ptr [[P_UPPERBOUND95]], ptr [[P_STRIDE96]], i32 1, i32 0), !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND94]], align 4, !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND95]], align 4, !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER81:%.*]], !dbg [[DBG205]] -// CHECK-DEBUG: omp_loop.header81: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV87:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER80]] ], [ [[OMP_LOOP_NEXT89:%.*]], [[OMP_LOOP_INC84:%.*]] ], !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND82:%.*]], !dbg [[DBG206]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV87:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER80]] ], [ [[OMP_LOOP_NEXT89:%.*]], [[OMP_LOOP_INC84:%.*]] ], !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND82:%.*]], !dbg [[DBG205]] -// CHECK-DEBUG: omp_loop.cond82: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP88:%.*]] = icmp ult i32 [[OMP_LOOP_IV87]], [[TMP17]], !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP88]], label [[OMP_LOOP_BODY83:%.*]], label [[OMP_LOOP_EXIT85:%.*]], !dbg [[DBG206]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP88:%.*]] = icmp ult i32 [[OMP_LOOP_IV87]], [[TMP17]], !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP88]], label [[OMP_LOOP_BODY83:%.*]], label [[OMP_LOOP_EXIT85:%.*]], !dbg [[DBG205]] -// CHECK-DEBUG: omp_loop.exit85: --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM97]]), !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM98:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28]]), !dbg [[DBG208:![0-9]+]] --// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB29:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM98]]), !dbg [[DBG208]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER86:%.*]], !dbg [[DBG206]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM97]]), !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM98:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28]]), !dbg [[DBG207:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB29:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM98]]), !dbg [[DBG207]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER86:%.*]], !dbg [[DBG205]] -// CHECK-DEBUG: omp_loop.after86: --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB31:[0-9]+]]), !dbg [[DBG209:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB31:[0-9]+]]), !dbg [[DBG208:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL213:%.*]] -// CHECK-DEBUG: omp_parallel213: -// CHECK-DEBUG-NEXT: [[GEP_A_ADDR210:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 0 @@ -2120,83 +2168,83 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest -// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR211]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_R_ADDR212:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 2 -// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR212]], align 8 --// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB31]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG209]]), !dbg [[DBG210:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB31]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG209]]), !dbg [[DBG209:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT134:%.*]] -// CHECK-DEBUG: omp.par.outlined.exit134: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT105_SPLIT:%.*]] -// CHECK-DEBUG: omp.par.exit105.split: --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I135]], metadata [[META214:![0-9]+]], metadata !DIExpression()), !dbg [[DBG217:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 0, ptr [[I135]], align 4, !dbg [[DBG217]] --// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED136]], i32 0, i32 0, !dbg [[DBG218:![0-9]+]] --// CHECK-DEBUG-NEXT: store ptr [[I135]], ptr [[TMP18]], align 8, !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED137]], i32 0, i32 0, !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = load i32, ptr [[I135]], align 4, !dbg [[DBG219:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]]), !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: [[DOTCOUNT139:%.*]] = load i32, ptr [[DOTCOUNT_ADDR138]], align 4, !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER140:%.*]], !dbg [[DBG218]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I135]], [[META213:![0-9]+]], !DIExpression(), [[META216:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[I135]], align 4, !dbg [[META216]] +-// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED136]], i32 0, i32 0, !dbg [[DBG217:![0-9]+]] +-// CHECK-DEBUG-NEXT: store ptr [[I135]], ptr [[TMP18]], align 8, !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED137]], i32 0, i32 0, !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = load i32, ptr [[I135]], align 4, !dbg [[DBG218:![0-9]+]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]]), !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: [[DOTCOUNT139:%.*]] = load i32, ptr [[DOTCOUNT_ADDR138]], align 4, !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER140:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.preheader140: --// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = sub i32 [[DOTCOUNT139]], 1, !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: store i32 [[TMP21]], ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE156]], align 4, !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM157:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36:[0-9]+]]), !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]], i32 34, ptr [[P_LASTITER153]], ptr [[P_LOWERBOUND154]], ptr [[P_UPPERBOUND155]], ptr [[P_STRIDE156]], i32 1, i32 0), !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = load i32, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = load i32, ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]], !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], 1, !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER141:%.*]], !dbg [[DBG218]] +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = sub i32 [[DOTCOUNT139]], 1, !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP21]], ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE156]], align 4, !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM157:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36:[0-9]+]]), !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]], i32 34, ptr [[P_LASTITER153]], ptr [[P_LOWERBOUND154]], ptr [[P_UPPERBOUND155]], ptr [[P_STRIDE156]], i32 1, i32 0), !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = load i32, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = load i32, ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]], !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], 1, !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER141:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.header141: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV147:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER140]] ], [ [[OMP_LOOP_NEXT149:%.*]], [[OMP_LOOP_INC144:%.*]] ], !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND142:%.*]], !dbg [[DBG218]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV147:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER140]] ], [ [[OMP_LOOP_NEXT149:%.*]], [[OMP_LOOP_INC144:%.*]] ], !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND142:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.cond142: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP148:%.*]] = icmp ult i32 [[OMP_LOOP_IV147]], [[TMP25]], !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP148]], label [[OMP_LOOP_BODY143:%.*]], label [[OMP_LOOP_EXIT145:%.*]], !dbg [[DBG218]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP148:%.*]] = icmp ult i32 [[OMP_LOOP_IV147]], [[TMP25]], !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP148]], label [[OMP_LOOP_BODY143:%.*]], label [[OMP_LOOP_EXIT145:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.exit145: --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]]), !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM158:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG220:![0-9]+]] --// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM158]]), !dbg [[DBG220]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER146:%.*]], !dbg [[DBG218]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]]), !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM158:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG219:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM158]]), !dbg [[DBG219]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER146:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.after146: --// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]], !dbg [[DBG221:![0-9]+]] +-// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]], !dbg [[DBG220:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT47_SPLIT:%.*]] +// CHECK-DEBUG: omp.par.exit47.split: -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I77]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG204:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[I77]], align 4, !dbg [[DBG204]] -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED78]], i32 0, i32 0, !dbg [[DBG205:![0-9]+]] -+// CHECK-DEBUG-NEXT: store ptr [[I77]], ptr [[TMP9]], align 8, !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED79]], i32 0, i32 0, !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[I77]], align 4, !dbg [[DBG206:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4, !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR80]], ptr [[AGG_CAPTURED78]]), !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: [[DOTCOUNT81:%.*]] = load i32, ptr [[DOTCOUNT_ADDR80]], align 4, !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER82:%.*]], !dbg [[DBG205]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I77]], [[META202:![0-9]+]], !DIExpression(), [[META205:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[I77]], align 4, !dbg [[META205]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED78]], i32 0, i32 0, !dbg [[DBG206:![0-9]+]] ++// CHECK-DEBUG-NEXT: store ptr [[I77]], ptr [[TMP9]], align 8, !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED79]], i32 0, i32 0, !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[I77]], align 4, !dbg [[DBG207:![0-9]+]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4, !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR80]], ptr [[AGG_CAPTURED78]]), !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: [[DOTCOUNT81:%.*]] = load i32, ptr [[DOTCOUNT_ADDR80]], align 4, !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER82:%.*]], !dbg [[DBG206]] +// CHECK-DEBUG: omp_loop.preheader82: -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND96]], align 4, !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = sub i32 [[DOTCOUNT81]], 1, !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[P_UPPERBOUND97]], align 4, !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE98]], align 4, !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28:[0-9]+]]), !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM99]], i32 34, ptr [[P_LASTITER95]], ptr [[P_LOWERBOUND96]], ptr [[P_UPPERBOUND97]], ptr [[P_STRIDE98]], i32 1, i32 0), !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load i32, ptr [[P_LOWERBOUND96]], align 4, !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_UPPERBOUND97]], align 4, !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS1100:%.*]] = sub i32 [[TMP14]], [[TMP13]], !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = add i32 [[TRIP_COUNT_MINUS1100]], 1, !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER83:%.*]], !dbg [[DBG205]] ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND96]], align 4, !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = sub i32 [[DOTCOUNT81]], 1, !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[P_UPPERBOUND97]], align 4, !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE98]], align 4, !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28:[0-9]+]]), !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM99]], i32 34, ptr [[P_LASTITER95]], ptr [[P_LOWERBOUND96]], ptr [[P_UPPERBOUND97]], ptr [[P_STRIDE98]], i32 1, i32 0), !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load i32, ptr [[P_LOWERBOUND96]], align 4, !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_UPPERBOUND97]], align 4, !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS1100:%.*]] = sub i32 [[TMP14]], [[TMP13]], !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = add i32 [[TRIP_COUNT_MINUS1100]], 1, !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER83:%.*]], !dbg [[DBG206]] +// CHECK-DEBUG: omp_loop.header83: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV89:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER82]] ], [ [[OMP_LOOP_NEXT91:%.*]], [[OMP_LOOP_INC86:%.*]] ], !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND84:%.*]], !dbg [[DBG205]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV89:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER82]] ], [ [[OMP_LOOP_NEXT91:%.*]], [[OMP_LOOP_INC86:%.*]] ], !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND84:%.*]], !dbg [[DBG206]] +// CHECK-DEBUG: omp_loop.cond84: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP90:%.*]] = icmp ult i32 [[OMP_LOOP_IV89]], [[TMP15]], !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP90]], label [[OMP_LOOP_BODY85:%.*]], label [[OMP_LOOP_EXIT87:%.*]], !dbg [[DBG205]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP90:%.*]] = icmp ult i32 [[OMP_LOOP_IV89]], [[TMP15]], !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP90]], label [[OMP_LOOP_BODY85:%.*]], label [[OMP_LOOP_EXIT87:%.*]], !dbg [[DBG206]] +// CHECK-DEBUG: omp_loop.exit87: -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM99]]), !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM101:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28]]), !dbg [[DBG207:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB29:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM101]]), !dbg [[DBG207]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER88:%.*]], !dbg [[DBG205]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM99]]), !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM101:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28]]), !dbg [[DBG208:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB29:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM101]]), !dbg [[DBG208]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER88:%.*]], !dbg [[DBG206]] +// CHECK-DEBUG: omp_loop.after88: -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM102:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB31:[0-9]+]]), !dbg [[DBG208:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM102:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB31:[0-9]+]]), !dbg [[DBG209:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL220:%.*]] +// CHECK-DEBUG: omp_parallel220: +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR217:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG216]], i32 0, i32 0 @@ -2205,142 +2253,142 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest +// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR218]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR219:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG216]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR219]], align 8 -+// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB31]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG216]]), !dbg [[DBG209:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB31]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG216]]), !dbg [[DBG210:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT138:%.*]] +// CHECK-DEBUG: omp.par.outlined.exit138: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT108_SPLIT:%.*]] +// CHECK-DEBUG: omp.par.exit108.split: -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I139]], metadata [[META213:![0-9]+]], metadata !DIExpression()), !dbg [[DBG216:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[I139]], align 4, !dbg [[DBG216]] -+// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED140]], i32 0, i32 0, !dbg [[DBG217:![0-9]+]] -+// CHECK-DEBUG-NEXT: store ptr [[I139]], ptr [[TMP16]], align 8, !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED141]], i32 0, i32 0, !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = load i32, ptr [[I139]], align 4, !dbg [[DBG218:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4, !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR142]], ptr [[AGG_CAPTURED140]]), !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: [[DOTCOUNT143:%.*]] = load i32, ptr [[DOTCOUNT_ADDR142]], align 4, !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER144:%.*]], !dbg [[DBG217]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I139]], [[META214:![0-9]+]], !DIExpression(), [[META217:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[I139]], align 4, !dbg [[META217]] ++// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED140]], i32 0, i32 0, !dbg [[DBG218:![0-9]+]] ++// CHECK-DEBUG-NEXT: store ptr [[I139]], ptr [[TMP16]], align 8, !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED141]], i32 0, i32 0, !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = load i32, ptr [[I139]], align 4, !dbg [[DBG219:![0-9]+]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4, !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR142]], ptr [[AGG_CAPTURED140]]), !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: [[DOTCOUNT143:%.*]] = load i32, ptr [[DOTCOUNT_ADDR142]], align 4, !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER144:%.*]], !dbg [[DBG218]] +// CHECK-DEBUG: omp_loop.preheader144: -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND158]], align 4, !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = sub i32 [[DOTCOUNT143]], 1, !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP19]], ptr [[P_UPPERBOUND159]], align 4, !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE160]], align 4, !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM161:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36:[0-9]+]]), !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM161]], i32 34, ptr [[P_LASTITER157]], ptr [[P_LOWERBOUND158]], ptr [[P_UPPERBOUND159]], ptr [[P_STRIDE160]], i32 1, i32 0), !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = load i32, ptr [[P_LOWERBOUND158]], align 4, !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = load i32, ptr [[P_UPPERBOUND159]], align 4, !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS1162:%.*]] = sub i32 [[TMP21]], [[TMP20]], !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = add i32 [[TRIP_COUNT_MINUS1162]], 1, !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER145:%.*]], !dbg [[DBG217]] ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND158]], align 4, !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = sub i32 [[DOTCOUNT143]], 1, !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP19]], ptr [[P_UPPERBOUND159]], align 4, !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE160]], align 4, !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM161:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36:[0-9]+]]), !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM161]], i32 34, ptr [[P_LASTITER157]], ptr [[P_LOWERBOUND158]], ptr [[P_UPPERBOUND159]], ptr [[P_STRIDE160]], i32 1, i32 0), !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = load i32, ptr [[P_LOWERBOUND158]], align 4, !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = load i32, ptr [[P_UPPERBOUND159]], align 4, !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS1162:%.*]] = sub i32 [[TMP21]], [[TMP20]], !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = add i32 [[TRIP_COUNT_MINUS1162]], 1, !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER145:%.*]], !dbg [[DBG218]] +// CHECK-DEBUG: omp_loop.header145: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV151:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER144]] ], [ [[OMP_LOOP_NEXT153:%.*]], [[OMP_LOOP_INC148:%.*]] ], !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND146:%.*]], !dbg [[DBG217]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV151:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER144]] ], [ [[OMP_LOOP_NEXT153:%.*]], [[OMP_LOOP_INC148:%.*]] ], !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND146:%.*]], !dbg [[DBG218]] +// CHECK-DEBUG: omp_loop.cond146: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP152:%.*]] = icmp ult i32 [[OMP_LOOP_IV151]], [[TMP22]], !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP152]], label [[OMP_LOOP_BODY147:%.*]], label [[OMP_LOOP_EXIT149:%.*]], !dbg [[DBG217]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP152:%.*]] = icmp ult i32 [[OMP_LOOP_IV151]], [[TMP22]], !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP152]], label [[OMP_LOOP_BODY147:%.*]], label [[OMP_LOOP_EXIT149:%.*]], !dbg [[DBG218]] +// CHECK-DEBUG: omp_loop.exit149: -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM161]]), !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM163:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG219:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM163]]), !dbg [[DBG219]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER150:%.*]], !dbg [[DBG217]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM161]]), !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM163:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG220:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM163]]), !dbg [[DBG220]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER150:%.*]], !dbg [[DBG218]] +// CHECK-DEBUG: omp_loop.after150: -+// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]], !dbg [[DBG220:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]], !dbg [[DBG221:![0-9]+]] // CHECK-DEBUG: omp.par.region9.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE10:%.*]] // CHECK-DEBUG: omp.par.pre_finalize10: --// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT159_EXITSTUB:%.*]], !dbg [[DBG221]] +-// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT159_EXITSTUB:%.*]], !dbg [[DBG220]] -// CHECK-DEBUG: omp_loop.body143: --// CHECK-DEBUG-NEXT: [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV147]], [[TMP22]], !dbg [[DBG220]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.16(ptr [[I135]], i32 [[TMP26]], ptr [[AGG_CAPTURED137]]), !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG222:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV150:%.*]] = sitofp i32 [[TMP27]] to double, !dbg [[DBG222]] --// CHECK-DEBUG-NEXT: [[TMP28:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG220]] --// CHECK-DEBUG-NEXT: [[ADD151:%.*]] = fadd double [[CONV150]], [[TMP28]], !dbg [[DBG223:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV152:%.*]] = fptrunc double [[ADD151]] to float, !dbg [[DBG222]] --// CHECK-DEBUG-NEXT: [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG224:![0-9]+]] --// CHECK-DEBUG-NEXT: store float [[CONV152]], ptr [[TMP29]], align 4, !dbg [[DBG225:![0-9]+]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC144]], !dbg [[DBG218]] +-// CHECK-DEBUG-NEXT: [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV147]], [[TMP22]], !dbg [[DBG219]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.16(ptr [[I135]], i32 [[TMP26]], ptr [[AGG_CAPTURED137]]), !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG221:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV150:%.*]] = sitofp i32 [[TMP27]] to double, !dbg [[DBG221]] +-// CHECK-DEBUG-NEXT: [[TMP28:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG219]] +-// CHECK-DEBUG-NEXT: [[ADD151:%.*]] = fadd double [[CONV150]], [[TMP28]], !dbg [[DBG222:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV152:%.*]] = fptrunc double [[ADD151]] to float, !dbg [[DBG221]] +-// CHECK-DEBUG-NEXT: [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG223:![0-9]+]] +-// CHECK-DEBUG-NEXT: store float [[CONV152]], ptr [[TMP29]], align 4, !dbg [[DBG224:![0-9]+]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC144]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.inc144: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT149]] = add nuw i32 [[OMP_LOOP_IV147]], 1, !dbg [[DBG218]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER141]], !dbg [[DBG218]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT149]] = add nuw i32 [[OMP_LOOP_IV147]], 1, !dbg [[DBG217]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER141]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.body83: --// CHECK-DEBUG-NEXT: [[TMP30:%.*]] = add i32 [[OMP_LOOP_IV87]], [[TMP14]], !dbg [[DBG208]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.12(ptr [[I75]], i32 [[TMP30]], ptr [[AGG_CAPTURED77]]), !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: [[TMP31:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG226:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV90:%.*]] = sitofp i32 [[TMP31]] to double, !dbg [[DBG226]] --// CHECK-DEBUG-NEXT: [[TMP32:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG208]] --// CHECK-DEBUG-NEXT: [[ADD91:%.*]] = fadd double [[CONV90]], [[TMP32]], !dbg [[DBG227:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV92:%.*]] = fptrunc double [[ADD91]] to float, !dbg [[DBG226]] --// CHECK-DEBUG-NEXT: [[TMP33:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG228:![0-9]+]] --// CHECK-DEBUG-NEXT: store float [[CONV92]], ptr [[TMP33]], align 4, !dbg [[DBG229:![0-9]+]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC84]], !dbg [[DBG206]] +-// CHECK-DEBUG-NEXT: [[TMP30:%.*]] = add i32 [[OMP_LOOP_IV87]], [[TMP14]], !dbg [[DBG207]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.12(ptr [[I75]], i32 [[TMP30]], ptr [[AGG_CAPTURED77]]), !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: [[TMP31:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG225:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV90:%.*]] = sitofp i32 [[TMP31]] to double, !dbg [[DBG225]] +-// CHECK-DEBUG-NEXT: [[TMP32:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG207]] +-// CHECK-DEBUG-NEXT: [[ADD91:%.*]] = fadd double [[CONV90]], [[TMP32]], !dbg [[DBG226:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV92:%.*]] = fptrunc double [[ADD91]] to float, !dbg [[DBG225]] +-// CHECK-DEBUG-NEXT: [[TMP33:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG227:![0-9]+]] +-// CHECK-DEBUG-NEXT: store float [[CONV92]], ptr [[TMP33]], align 4, !dbg [[DBG228:![0-9]+]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC84]], !dbg [[DBG205]] -// CHECK-DEBUG: omp_loop.inc84: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT89]] = add nuw i32 [[OMP_LOOP_IV87]], 1, !dbg [[DBG206]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER81]], !dbg [[DBG206]] -+// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT164_EXITSTUB:%.*]], !dbg [[DBG220]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT89]] = add nuw i32 [[OMP_LOOP_IV87]], 1, !dbg [[DBG205]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER81]], !dbg [[DBG205]] ++// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT164_EXITSTUB:%.*]], !dbg [[DBG221]] +// CHECK-DEBUG: omp_loop.body147: -+// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = add i32 [[OMP_LOOP_IV151]], [[TMP20]], !dbg [[DBG219]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.16(ptr [[I139]], i32 [[TMP23]], ptr [[AGG_CAPTURED141]]), !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: [[TMP24:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG221:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV154:%.*]] = sitofp i32 [[TMP24]] to double, !dbg [[DBG221]] -+// CHECK-DEBUG-NEXT: [[TMP25:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG219]] -+// CHECK-DEBUG-NEXT: [[ADD155:%.*]] = fadd double [[CONV154]], [[TMP25]], !dbg [[DBG222:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV156:%.*]] = fptrunc double [[ADD155]] to float, !dbg [[DBG221]] -+// CHECK-DEBUG-NEXT: [[TMP26:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG223:![0-9]+]] -+// CHECK-DEBUG-NEXT: store float [[CONV156]], ptr [[TMP26]], align 4, !dbg [[DBG224:![0-9]+]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC148]], !dbg [[DBG217]] ++// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = add i32 [[OMP_LOOP_IV151]], [[TMP20]], !dbg [[DBG220]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.16(ptr [[I139]], i32 [[TMP23]], ptr [[AGG_CAPTURED141]]), !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: [[TMP24:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG222:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV154:%.*]] = sitofp i32 [[TMP24]] to double, !dbg [[DBG222]] ++// CHECK-DEBUG-NEXT: [[TMP25:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG220]] ++// CHECK-DEBUG-NEXT: [[ADD155:%.*]] = fadd double [[CONV154]], [[TMP25]], !dbg [[DBG223:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV156:%.*]] = fptrunc double [[ADD155]] to float, !dbg [[DBG222]] ++// CHECK-DEBUG-NEXT: [[TMP26:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG224:![0-9]+]] ++// CHECK-DEBUG-NEXT: store float [[CONV156]], ptr [[TMP26]], align 4, !dbg [[DBG225:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC148]], !dbg [[DBG218]] +// CHECK-DEBUG: omp_loop.inc148: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT153]] = add nuw i32 [[OMP_LOOP_IV151]], 1, !dbg [[DBG217]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER145]], !dbg [[DBG217]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT153]] = add nuw i32 [[OMP_LOOP_IV151]], 1, !dbg [[DBG218]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER145]], !dbg [[DBG218]] +// CHECK-DEBUG: omp_loop.body85: -+// CHECK-DEBUG-NEXT: [[TMP27:%.*]] = add i32 [[OMP_LOOP_IV89]], [[TMP13]], !dbg [[DBG207]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.12(ptr [[I77]], i32 [[TMP27]], ptr [[AGG_CAPTURED79]]), !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: [[TMP28:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG225:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV92:%.*]] = sitofp i32 [[TMP28]] to double, !dbg [[DBG225]] -+// CHECK-DEBUG-NEXT: [[TMP29:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG207]] -+// CHECK-DEBUG-NEXT: [[ADD93:%.*]] = fadd double [[CONV92]], [[TMP29]], !dbg [[DBG226:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV94:%.*]] = fptrunc double [[ADD93]] to float, !dbg [[DBG225]] -+// CHECK-DEBUG-NEXT: [[TMP30:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG227:![0-9]+]] -+// CHECK-DEBUG-NEXT: store float [[CONV94]], ptr [[TMP30]], align 4, !dbg [[DBG228:![0-9]+]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC86]], !dbg [[DBG205]] ++// CHECK-DEBUG-NEXT: [[TMP27:%.*]] = add i32 [[OMP_LOOP_IV89]], [[TMP13]], !dbg [[DBG208]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.12(ptr [[I77]], i32 [[TMP27]], ptr [[AGG_CAPTURED79]]), !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: [[TMP28:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG226:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV92:%.*]] = sitofp i32 [[TMP28]] to double, !dbg [[DBG226]] ++// CHECK-DEBUG-NEXT: [[TMP29:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG208]] ++// CHECK-DEBUG-NEXT: [[ADD93:%.*]] = fadd double [[CONV92]], [[TMP29]], !dbg [[DBG227:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV94:%.*]] = fptrunc double [[ADD93]] to float, !dbg [[DBG226]] ++// CHECK-DEBUG-NEXT: [[TMP30:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG228:![0-9]+]] ++// CHECK-DEBUG-NEXT: store float [[CONV94]], ptr [[TMP30]], align 4, !dbg [[DBG229:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC86]], !dbg [[DBG206]] +// CHECK-DEBUG: omp_loop.inc86: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT91]] = add nuw i32 [[OMP_LOOP_IV89]], 1, !dbg [[DBG205]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER83]], !dbg [[DBG205]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT91]] = add nuw i32 [[OMP_LOOP_IV89]], 1, !dbg [[DBG206]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER83]], !dbg [[DBG206]] // CHECK-DEBUG: omp_loop.body24: --// CHECK-DEBUG-NEXT: [[TMP34:%.*]] = add i32 [[OMP_LOOP_IV28]], [[TMP6]], !dbg [[DBG196]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.8(ptr [[I16]], i32 [[TMP34]], ptr [[AGG_CAPTURED18]]), !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: [[TMP35:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG230:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV31:%.*]] = sitofp i32 [[TMP35]] to double, !dbg [[DBG230]] --// CHECK-DEBUG-NEXT: [[TMP36:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG196]] --// CHECK-DEBUG-NEXT: [[ADD32:%.*]] = fadd double [[CONV31]], [[TMP36]], !dbg [[DBG231:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV33:%.*]] = fptrunc double [[ADD32]] to float, !dbg [[DBG230]] --// CHECK-DEBUG-NEXT: [[TMP37:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG232:![0-9]+]] --// CHECK-DEBUG-NEXT: store float [[CONV33]], ptr [[TMP37]], align 4, !dbg [[DBG233:![0-9]+]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC25]], !dbg [[DBG194]] -+// CHECK-DEBUG-NEXT: [[TMP31:%.*]] = add i32 [[OMP_LOOP_IV28]], [[TMP6]], !dbg [[DBG195]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.8(ptr [[I16]], i32 [[TMP31]], ptr [[AGG_CAPTURED18]]), !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: [[TMP32:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG229:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV31:%.*]] = sitofp i32 [[TMP32]] to double, !dbg [[DBG229]] -+// CHECK-DEBUG-NEXT: [[TMP33:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG195]] -+// CHECK-DEBUG-NEXT: [[ADD32:%.*]] = fadd double [[CONV31]], [[TMP33]], !dbg [[DBG230:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV33:%.*]] = fptrunc double [[ADD32]] to float, !dbg [[DBG229]] -+// CHECK-DEBUG-NEXT: [[TMP34:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG231:![0-9]+]] -+// CHECK-DEBUG-NEXT: store float [[CONV33]], ptr [[TMP34]], align 4, !dbg [[DBG232:![0-9]+]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC25]], !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: [[TMP34:%.*]] = add i32 [[OMP_LOOP_IV28]], [[TMP6]], !dbg [[DBG195]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.8(ptr [[I16]], i32 [[TMP34]], ptr [[AGG_CAPTURED18]]), !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: [[TMP35:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG229:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV31:%.*]] = sitofp i32 [[TMP35]] to double, !dbg [[DBG229]] +-// CHECK-DEBUG-NEXT: [[TMP36:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG195]] +-// CHECK-DEBUG-NEXT: [[ADD32:%.*]] = fadd double [[CONV31]], [[TMP36]], !dbg [[DBG230:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV33:%.*]] = fptrunc double [[ADD32]] to float, !dbg [[DBG229]] +-// CHECK-DEBUG-NEXT: [[TMP37:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG231:![0-9]+]] +-// CHECK-DEBUG-NEXT: store float [[CONV33]], ptr [[TMP37]], align 4, !dbg [[DBG232:![0-9]+]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC25]], !dbg [[DBG193]] ++// CHECK-DEBUG-NEXT: [[TMP31:%.*]] = add i32 [[OMP_LOOP_IV28]], [[TMP6]], !dbg [[DBG196]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.8(ptr [[I16]], i32 [[TMP31]], ptr [[AGG_CAPTURED18]]), !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: [[TMP32:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG230:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV31:%.*]] = sitofp i32 [[TMP32]] to double, !dbg [[DBG230]] ++// CHECK-DEBUG-NEXT: [[TMP33:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG196]] ++// CHECK-DEBUG-NEXT: [[ADD32:%.*]] = fadd double [[CONV31]], [[TMP33]], !dbg [[DBG231:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV33:%.*]] = fptrunc double [[ADD32]] to float, !dbg [[DBG230]] ++// CHECK-DEBUG-NEXT: [[TMP34:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG232:![0-9]+]] ++// CHECK-DEBUG-NEXT: store float [[CONV33]], ptr [[TMP34]], align 4, !dbg [[DBG233:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC25]], !dbg [[DBG194]] // CHECK-DEBUG: omp_loop.inc25: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT30]] = add nuw i32 [[OMP_LOOP_IV28]], 1, !dbg [[DBG194]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER22]], !dbg [[DBG194]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT30]] = add nuw i32 [[OMP_LOOP_IV28]], 1, !dbg [[DBG193]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER22]], !dbg [[DBG193]] -// CHECK-DEBUG: omp.par.outlined.exit159.exitStub: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT30]] = add nuw i32 [[OMP_LOOP_IV28]], 1, !dbg [[DBG193]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER22]], !dbg [[DBG193]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT30]] = add nuw i32 [[OMP_LOOP_IV28]], 1, !dbg [[DBG194]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER22]], !dbg [[DBG194]] +// CHECK-DEBUG: omp.par.outlined.exit164.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.21 --// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR100:%.*]], ptr noalias [[ZERO_ADDR101:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG234:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR100:%.*]], ptr noalias [[ZERO_ADDR101:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG233:![0-9]+]] { -// CHECK-DEBUG-NEXT: omp.par.entry102: -+// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR103:%.*]], ptr noalias [[ZERO_ADDR104:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG233:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR103:%.*]], ptr noalias [[ZERO_ADDR104:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG234:![0-9]+]] { +// CHECK-DEBUG-NEXT: omp.par.entry105: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 @@ -2348,6 +2396,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +-// CHECK-DEBUG-NEXT: [[P_LASTITER128:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_LOWERBOUND129:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_UPPERBOUND130:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL106:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR100]], align 4 -// CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL106]], align 4 @@ -2356,66 +2408,66 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED111:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED112:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR113:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_LASTITER128:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_LOWERBOUND129:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_UPPERBOUND130:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION103:%.*]] -// CHECK-DEBUG: omp.par.region103: --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I110]], metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 0, ptr [[I110]], align 4, !dbg [[DBG241]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED111]], i32 0, i32 0, !dbg [[DBG242:![0-9]+]] --// CHECK-DEBUG-NEXT: store ptr [[I110]], ptr [[TMP2]], align 8, !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED112]], i32 0, i32 0, !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I110]], align 4, !dbg [[DBG243:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR113]], ptr [[AGG_CAPTURED111]]), !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: [[DOTCOUNT114:%.*]] = load i32, ptr [[DOTCOUNT_ADDR113]], align 4, !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER115:%.*]], !dbg [[DBG242]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I110]], [[META234:![0-9]+]], !DIExpression(), [[META240:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[I110]], align 4, !dbg [[META240]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED111]], i32 0, i32 0, !dbg [[DBG241:![0-9]+]] +-// CHECK-DEBUG-NEXT: store ptr [[I110]], ptr [[TMP2]], align 8, !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED112]], i32 0, i32 0, !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I110]], align 4, !dbg [[DBG242:![0-9]+]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR113]], ptr [[AGG_CAPTURED111]]), !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: [[DOTCOUNT114:%.*]] = load i32, ptr [[DOTCOUNT_ADDR113]], align 4, !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER115:%.*]], !dbg [[DBG241]] -// CHECK-DEBUG: omp_loop.preheader115: --// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND129]], align 4, !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT114]], 1, !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND130]], align 4, !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE131]], align 4, !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM132:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33:[0-9]+]]), !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM132]], i32 34, ptr [[P_LASTITER128]], ptr [[P_LOWERBOUND129]], ptr [[P_UPPERBOUND130]], ptr [[P_STRIDE131]], i32 1, i32 0), !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND129]], align 4, !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND130]], align 4, !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER116:%.*]], !dbg [[DBG242]] +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND129]], align 4, !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT114]], 1, !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND130]], align 4, !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE131]], align 4, !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM132:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33:[0-9]+]]), !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM132]], i32 34, ptr [[P_LASTITER128]], ptr [[P_LOWERBOUND129]], ptr [[P_UPPERBOUND130]], ptr [[P_STRIDE131]], i32 1, i32 0), !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND129]], align 4, !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND130]], align 4, !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER116:%.*]], !dbg [[DBG241]] -// CHECK-DEBUG: omp_loop.header116: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV122:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER115]] ], [ [[OMP_LOOP_NEXT124:%.*]], [[OMP_LOOP_INC119:%.*]] ], !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND117:%.*]], !dbg [[DBG242]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV122:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER115]] ], [ [[OMP_LOOP_NEXT124:%.*]], [[OMP_LOOP_INC119:%.*]] ], !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND117:%.*]], !dbg [[DBG241]] -// CHECK-DEBUG: omp_loop.cond117: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP123:%.*]] = icmp ult i32 [[OMP_LOOP_IV122]], [[TMP9]], !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP123]], label [[OMP_LOOP_BODY118:%.*]], label [[OMP_LOOP_EXIT120:%.*]], !dbg [[DBG242]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP123:%.*]] = icmp ult i32 [[OMP_LOOP_IV122]], [[TMP9]], !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP123]], label [[OMP_LOOP_BODY118:%.*]], label [[OMP_LOOP_EXIT120:%.*]], !dbg [[DBG241]] -// CHECK-DEBUG: omp_loop.exit120: --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM132]]), !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM133:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33]]), !dbg [[DBG244:![0-9]+]] --// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB34:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM133]]), !dbg [[DBG244]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER121:%.*]], !dbg [[DBG242]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM132]]), !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM133:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33]]), !dbg [[DBG243:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB34:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM133]]), !dbg [[DBG243]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER121:%.*]], !dbg [[DBG241]] -// CHECK-DEBUG: omp_loop.after121: --// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION103_PARALLEL_AFTER:%.*]], !dbg [[DBG245:![0-9]+]] +-// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION103_PARALLEL_AFTER:%.*]], !dbg [[DBG244:![0-9]+]] -// CHECK-DEBUG: omp.par.region103.parallel.after: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE104:%.*]] -// CHECK-DEBUG: omp.par.pre_finalize104: --// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT134_EXITSTUB:%.*]], !dbg [[DBG245]] +-// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT134_EXITSTUB:%.*]], !dbg [[DBG244]] -// CHECK-DEBUG: omp_loop.body118: --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV122]], [[TMP6]], !dbg [[DBG244]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.14(ptr [[I110]], i32 [[TMP10]], ptr [[AGG_CAPTURED112]]), !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG246:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV125:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG246]] --// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG244]] --// CHECK-DEBUG-NEXT: [[ADD126:%.*]] = fadd double [[CONV125]], [[TMP12]], !dbg [[DBG247:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV127:%.*]] = fptrunc double [[ADD126]] to float, !dbg [[DBG246]] --// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG248:![0-9]+]] --// CHECK-DEBUG-NEXT: store float [[CONV127]], ptr [[TMP13]], align 4, !dbg [[DBG249:![0-9]+]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC119]], !dbg [[DBG242]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV122]], [[TMP6]], !dbg [[DBG243]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.14(ptr [[I110]], i32 [[TMP10]], ptr [[AGG_CAPTURED112]]), !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG245:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV125:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG245]] +-// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG243]] +-// CHECK-DEBUG-NEXT: [[ADD126:%.*]] = fadd double [[CONV125]], [[TMP12]], !dbg [[DBG246:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV127:%.*]] = fptrunc double [[ADD126]] to float, !dbg [[DBG245]] +-// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG247:![0-9]+]] +-// CHECK-DEBUG-NEXT: store float [[CONV127]], ptr [[TMP13]], align 4, !dbg [[DBG248:![0-9]+]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC119]], !dbg [[DBG241]] -// CHECK-DEBUG: omp_loop.inc119: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT124]] = add nuw i32 [[OMP_LOOP_IV122]], 1, !dbg [[DBG242]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER116]], !dbg [[DBG242]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT124]] = add nuw i32 [[OMP_LOOP_IV122]], 1, !dbg [[DBG241]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER116]], !dbg [[DBG241]] -// CHECK-DEBUG: omp.par.outlined.exit134.exitStub: ++// CHECK-DEBUG-NEXT: [[P_LASTITER131:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_LOWERBOUND132:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_UPPERBOUND133:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_STRIDE134:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL109:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR103]], align 4 +// CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL109]], align 4 @@ -2424,73 +2476,69 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED114:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED115:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR116:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_LASTITER131:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_LOWERBOUND132:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_UPPERBOUND133:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_STRIDE134:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION106:%.*]] +// CHECK-DEBUG: omp.par.region106: -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I113]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG240:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[I113]], align 4, !dbg [[DBG240]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED114]], i32 0, i32 0, !dbg [[DBG241:![0-9]+]] -+// CHECK-DEBUG-NEXT: store ptr [[I113]], ptr [[TMP2]], align 8, !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED115]], i32 0, i32 0, !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I113]], align 4, !dbg [[DBG242:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR116]], ptr [[AGG_CAPTURED114]]), !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: [[DOTCOUNT117:%.*]] = load i32, ptr [[DOTCOUNT_ADDR116]], align 4, !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER118:%.*]], !dbg [[DBG241]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I113]], [[META235:![0-9]+]], !DIExpression(), [[META241:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[I113]], align 4, !dbg [[META241]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED114]], i32 0, i32 0, !dbg [[DBG242:![0-9]+]] ++// CHECK-DEBUG-NEXT: store ptr [[I113]], ptr [[TMP2]], align 8, !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED115]], i32 0, i32 0, !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I113]], align 4, !dbg [[DBG243:![0-9]+]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR116]], ptr [[AGG_CAPTURED114]]), !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: [[DOTCOUNT117:%.*]] = load i32, ptr [[DOTCOUNT_ADDR116]], align 4, !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER118:%.*]], !dbg [[DBG242]] +// CHECK-DEBUG: omp_loop.preheader118: -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND132]], align 4, !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT117]], 1, !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND133]], align 4, !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE134]], align 4, !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM135:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33:[0-9]+]]), !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM135]], i32 34, ptr [[P_LASTITER131]], ptr [[P_LOWERBOUND132]], ptr [[P_UPPERBOUND133]], ptr [[P_STRIDE134]], i32 1, i32 0), !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND132]], align 4, !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND133]], align 4, !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS1136:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS1136]], 1, !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER119:%.*]], !dbg [[DBG241]] ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND132]], align 4, !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT117]], 1, !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND133]], align 4, !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE134]], align 4, !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM135:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33:[0-9]+]]), !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM135]], i32 34, ptr [[P_LASTITER131]], ptr [[P_LOWERBOUND132]], ptr [[P_UPPERBOUND133]], ptr [[P_STRIDE134]], i32 1, i32 0), !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND132]], align 4, !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND133]], align 4, !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS1136:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS1136]], 1, !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER119:%.*]], !dbg [[DBG242]] +// CHECK-DEBUG: omp_loop.header119: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV125:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER118]] ], [ [[OMP_LOOP_NEXT127:%.*]], [[OMP_LOOP_INC122:%.*]] ], !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND120:%.*]], !dbg [[DBG241]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV125:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER118]] ], [ [[OMP_LOOP_NEXT127:%.*]], [[OMP_LOOP_INC122:%.*]] ], !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND120:%.*]], !dbg [[DBG242]] +// CHECK-DEBUG: omp_loop.cond120: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP126:%.*]] = icmp ult i32 [[OMP_LOOP_IV125]], [[TMP8]], !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP126]], label [[OMP_LOOP_BODY121:%.*]], label [[OMP_LOOP_EXIT123:%.*]], !dbg [[DBG241]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP126:%.*]] = icmp ult i32 [[OMP_LOOP_IV125]], [[TMP8]], !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP126]], label [[OMP_LOOP_BODY121:%.*]], label [[OMP_LOOP_EXIT123:%.*]], !dbg [[DBG242]] +// CHECK-DEBUG: omp_loop.exit123: -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM135]]), !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM137:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33]]), !dbg [[DBG243:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB34:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM137]]), !dbg [[DBG243]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER124:%.*]], !dbg [[DBG241]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM135]]), !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM137:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33]]), !dbg [[DBG244:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB34:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM137]]), !dbg [[DBG244]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER124:%.*]], !dbg [[DBG242]] +// CHECK-DEBUG: omp_loop.after124: -+// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION106_PARALLEL_AFTER:%.*]], !dbg [[DBG244:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION106_PARALLEL_AFTER:%.*]], !dbg [[DBG245:![0-9]+]] +// CHECK-DEBUG: omp.par.region106.parallel.after: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE107:%.*]] +// CHECK-DEBUG: omp.par.pre_finalize107: -+// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT138_EXITSTUB:%.*]], !dbg [[DBG244]] ++// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT138_EXITSTUB:%.*]], !dbg [[DBG245]] +// CHECK-DEBUG: omp_loop.body121: -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV125]], [[TMP6]], !dbg [[DBG243]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.14(ptr [[I113]], i32 [[TMP9]], ptr [[AGG_CAPTURED115]]), !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG245:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV128:%.*]] = sitofp i32 [[TMP10]] to double, !dbg [[DBG245]] -+// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG243]] -+// CHECK-DEBUG-NEXT: [[ADD129:%.*]] = fadd double [[CONV128]], [[TMP11]], !dbg [[DBG246:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV130:%.*]] = fptrunc double [[ADD129]] to float, !dbg [[DBG245]] -+// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG247:![0-9]+]] -+// CHECK-DEBUG-NEXT: store float [[CONV130]], ptr [[TMP12]], align 4, !dbg [[DBG248:![0-9]+]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC122]], !dbg [[DBG241]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV125]], [[TMP6]], !dbg [[DBG244]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.14(ptr [[I113]], i32 [[TMP9]], ptr [[AGG_CAPTURED115]]), !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG246:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV128:%.*]] = sitofp i32 [[TMP10]] to double, !dbg [[DBG246]] ++// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG244]] ++// CHECK-DEBUG-NEXT: [[ADD129:%.*]] = fadd double [[CONV128]], [[TMP11]], !dbg [[DBG247:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV130:%.*]] = fptrunc double [[ADD129]] to float, !dbg [[DBG246]] ++// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG248:![0-9]+]] ++// CHECK-DEBUG-NEXT: store float [[CONV130]], ptr [[TMP12]], align 4, !dbg [[DBG249:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC122]], !dbg [[DBG242]] +// CHECK-DEBUG: omp_loop.inc122: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT127]] = add nuw i32 [[OMP_LOOP_IV125]], 1, !dbg [[DBG241]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER119]], !dbg [[DBG241]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT127]] = add nuw i32 [[OMP_LOOP_IV125]], 1, !dbg [[DBG242]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER119]], !dbg [[DBG242]] +// CHECK-DEBUG: omp.par.outlined.exit138.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par --// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR41:%.*]], ptr noalias [[ZERO_ADDR42:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG250:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR41:%.*]], ptr noalias [[ZERO_ADDR42:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG249:![0-9]+]] { -// CHECK-DEBUG-NEXT: omp.par.entry43: -+// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR42:%.*]], ptr noalias [[ZERO_ADDR43:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG249:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR42:%.*]], ptr noalias [[ZERO_ADDR43:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG250:![0-9]+]] { +// CHECK-DEBUG-NEXT: omp.par.entry44: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 @@ -2498,6 +2546,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +-// CHECK-DEBUG-NEXT: [[P_LASTITER69:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_LOWERBOUND70:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_UPPERBOUND71:%.*]] = alloca i32, align 4 +-// CHECK-DEBUG-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL47:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR41]], align 4 -// CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL47]], align 4 @@ -2506,65 +2558,65 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED52:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR54:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_LASTITER69:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_LOWERBOUND70:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_UPPERBOUND71:%.*]] = alloca i32, align 4 --// CHECK-DEBUG-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION44:%.*]] -// CHECK-DEBUG: omp.par.region44: --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I51]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG257:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 0, ptr [[I51]], align 4, !dbg [[DBG257]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED52]], i32 0, i32 0, !dbg [[DBG258:![0-9]+]] --// CHECK-DEBUG-NEXT: store ptr [[I51]], ptr [[TMP2]], align 8, !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED53]], i32 0, i32 0, !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I51]], align 4, !dbg [[DBG259:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR54]], ptr [[AGG_CAPTURED52]]), !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: [[DOTCOUNT55:%.*]] = load i32, ptr [[DOTCOUNT_ADDR54]], align 4, !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER56:%.*]], !dbg [[DBG258]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I51]], [[META250:![0-9]+]], !DIExpression(), [[META256:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[I51]], align 4, !dbg [[META256]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED52]], i32 0, i32 0, !dbg [[DBG257:![0-9]+]] +-// CHECK-DEBUG-NEXT: store ptr [[I51]], ptr [[TMP2]], align 8, !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED53]], i32 0, i32 0, !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I51]], align 4, !dbg [[DBG258:![0-9]+]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR54]], ptr [[AGG_CAPTURED52]]), !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: [[DOTCOUNT55:%.*]] = load i32, ptr [[DOTCOUNT_ADDR54]], align 4, !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER56:%.*]], !dbg [[DBG257]] -// CHECK-DEBUG: omp_loop.preheader56: --// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND70]], align 4, !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT55]], 1, !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND71]], align 4, !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE72]], align 4, !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM73:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25:[0-9]+]]), !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM73]], i32 34, ptr [[P_LASTITER69]], ptr [[P_LOWERBOUND70]], ptr [[P_UPPERBOUND71]], ptr [[P_STRIDE72]], i32 1, i32 0), !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND70]], align 4, !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND71]], align 4, !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER57:%.*]], !dbg [[DBG258]] +-// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND70]], align 4, !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT55]], 1, !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND71]], align 4, !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE72]], align 4, !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM73:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25:[0-9]+]]), !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM73]], i32 34, ptr [[P_LASTITER69]], ptr [[P_LOWERBOUND70]], ptr [[P_UPPERBOUND71]], ptr [[P_STRIDE72]], i32 1, i32 0), !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND70]], align 4, !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND71]], align 4, !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER57:%.*]], !dbg [[DBG257]] -// CHECK-DEBUG: omp_loop.header57: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV63:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER56]] ], [ [[OMP_LOOP_NEXT65:%.*]], [[OMP_LOOP_INC60:%.*]] ], !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND58:%.*]], !dbg [[DBG258]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV63:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER56]] ], [ [[OMP_LOOP_NEXT65:%.*]], [[OMP_LOOP_INC60:%.*]] ], !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND58:%.*]], !dbg [[DBG257]] -// CHECK-DEBUG: omp_loop.cond58: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP64:%.*]] = icmp ult i32 [[OMP_LOOP_IV63]], [[TMP9]], !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP64]], label [[OMP_LOOP_BODY59:%.*]], label [[OMP_LOOP_EXIT61:%.*]], !dbg [[DBG258]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP64:%.*]] = icmp ult i32 [[OMP_LOOP_IV63]], [[TMP9]], !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP64]], label [[OMP_LOOP_BODY59:%.*]], label [[OMP_LOOP_EXIT61:%.*]], !dbg [[DBG257]] -// CHECK-DEBUG: omp_loop.exit61: --// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM73]]), !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM74:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25]]), !dbg [[DBG260:![0-9]+]] --// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB26:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM74]]), !dbg [[DBG260]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER62:%.*]], !dbg [[DBG258]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM73]]), !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM74:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25]]), !dbg [[DBG259:![0-9]+]] +-// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB26:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM74]]), !dbg [[DBG259]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER62:%.*]], !dbg [[DBG257]] -// CHECK-DEBUG: omp_loop.after62: --// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION44_PARALLEL_AFTER:%.*]], !dbg [[DBG261:![0-9]+]] +-// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION44_PARALLEL_AFTER:%.*]], !dbg [[DBG260:![0-9]+]] -// CHECK-DEBUG: omp.par.region44.parallel.after: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE45:%.*]] -// CHECK-DEBUG: omp.par.pre_finalize45: --// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG261]] +-// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG260]] -// CHECK-DEBUG: omp_loop.body59: --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV63]], [[TMP6]], !dbg [[DBG260]] --// CHECK-DEBUG-NEXT: call void @__captured_stmt.10(ptr [[I51]], i32 [[TMP10]], ptr [[AGG_CAPTURED53]]), !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG262:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV66:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG262]] --// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG260]] --// CHECK-DEBUG-NEXT: [[ADD67:%.*]] = fadd double [[CONV66]], [[TMP12]], !dbg [[DBG263:![0-9]+]] --// CHECK-DEBUG-NEXT: [[CONV68:%.*]] = fptrunc double [[ADD67]] to float, !dbg [[DBG262]] --// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG264:![0-9]+]] --// CHECK-DEBUG-NEXT: store float [[CONV68]], ptr [[TMP13]], align 4, !dbg [[DBG265:![0-9]+]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC60]], !dbg [[DBG258]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV63]], [[TMP6]], !dbg [[DBG259]] +-// CHECK-DEBUG-NEXT: call void @__captured_stmt.10(ptr [[I51]], i32 [[TMP10]], ptr [[AGG_CAPTURED53]]), !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG261:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV66:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG261]] +-// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG259]] +-// CHECK-DEBUG-NEXT: [[ADD67:%.*]] = fadd double [[CONV66]], [[TMP12]], !dbg [[DBG262:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[CONV68:%.*]] = fptrunc double [[ADD67]] to float, !dbg [[DBG261]] +-// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG263:![0-9]+]] +-// CHECK-DEBUG-NEXT: store float [[CONV68]], ptr [[TMP13]], align 4, !dbg [[DBG264:![0-9]+]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC60]], !dbg [[DBG257]] -// CHECK-DEBUG: omp_loop.inc60: --// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT65]] = add nuw i32 [[OMP_LOOP_IV63]], 1, !dbg [[DBG258]] --// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER57]], !dbg [[DBG258]] +-// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT65]] = add nuw i32 [[OMP_LOOP_IV63]], 1, !dbg [[DBG257]] +-// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER57]], !dbg [[DBG257]] ++// CHECK-DEBUG-NEXT: [[P_LASTITER70:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_LOWERBOUND71:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_UPPERBOUND72:%.*]] = alloca i32, align 4 ++// CHECK-DEBUG-NEXT: [[P_STRIDE73:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL48:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR42]], align 4 +// CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL48]], align 4 @@ -2573,72 +2625,68 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED54:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR55:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_LASTITER70:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_LOWERBOUND71:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_UPPERBOUND72:%.*]] = alloca i32, align 4 -+// CHECK-DEBUG-NEXT: [[P_STRIDE73:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION45:%.*]] +// CHECK-DEBUG: omp.par.region45: -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I52]], metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG256:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[I52]], align 4, !dbg [[DBG256]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED53]], i32 0, i32 0, !dbg [[DBG257:![0-9]+]] -+// CHECK-DEBUG-NEXT: store ptr [[I52]], ptr [[TMP2]], align 8, !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED54]], i32 0, i32 0, !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I52]], align 4, !dbg [[DBG258:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR55]], ptr [[AGG_CAPTURED53]]), !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: [[DOTCOUNT56:%.*]] = load i32, ptr [[DOTCOUNT_ADDR55]], align 4, !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER57:%.*]], !dbg [[DBG257]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I52]], [[META251:![0-9]+]], !DIExpression(), [[META257:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[I52]], align 4, !dbg [[META257]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED53]], i32 0, i32 0, !dbg [[DBG258:![0-9]+]] ++// CHECK-DEBUG-NEXT: store ptr [[I52]], ptr [[TMP2]], align 8, !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED54]], i32 0, i32 0, !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I52]], align 4, !dbg [[DBG259:![0-9]+]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR55]], ptr [[AGG_CAPTURED53]]), !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: [[DOTCOUNT56:%.*]] = load i32, ptr [[DOTCOUNT_ADDR55]], align 4, !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER57:%.*]], !dbg [[DBG258]] +// CHECK-DEBUG: omp_loop.preheader57: -+// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND71]], align 4, !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT56]], 1, !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND72]], align 4, !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE73]], align 4, !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM74:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25:[0-9]+]]), !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM74]], i32 34, ptr [[P_LASTITER70]], ptr [[P_LOWERBOUND71]], ptr [[P_UPPERBOUND72]], ptr [[P_STRIDE73]], i32 1, i32 0), !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND71]], align 4, !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND72]], align 4, !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS175:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS175]], 1, !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER58:%.*]], !dbg [[DBG257]] ++// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND71]], align 4, !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT56]], 1, !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND72]], align 4, !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE73]], align 4, !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM74:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25:[0-9]+]]), !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM74]], i32 34, ptr [[P_LASTITER70]], ptr [[P_LOWERBOUND71]], ptr [[P_UPPERBOUND72]], ptr [[P_STRIDE73]], i32 1, i32 0), !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND71]], align 4, !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND72]], align 4, !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: [[TRIP_COUNT_MINUS175:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS175]], 1, !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER58:%.*]], !dbg [[DBG258]] +// CHECK-DEBUG: omp_loop.header58: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV64:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER57]] ], [ [[OMP_LOOP_NEXT66:%.*]], [[OMP_LOOP_INC61:%.*]] ], !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND59:%.*]], !dbg [[DBG257]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV64:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER57]] ], [ [[OMP_LOOP_NEXT66:%.*]], [[OMP_LOOP_INC61:%.*]] ], !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND59:%.*]], !dbg [[DBG258]] +// CHECK-DEBUG: omp_loop.cond59: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP65:%.*]] = icmp ult i32 [[OMP_LOOP_IV64]], [[TMP8]], !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP65]], label [[OMP_LOOP_BODY60:%.*]], label [[OMP_LOOP_EXIT62:%.*]], !dbg [[DBG257]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP65:%.*]] = icmp ult i32 [[OMP_LOOP_IV64]], [[TMP8]], !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP65]], label [[OMP_LOOP_BODY60:%.*]], label [[OMP_LOOP_EXIT62:%.*]], !dbg [[DBG258]] +// CHECK-DEBUG: omp_loop.exit62: -+// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM74]]), !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM76:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25]]), !dbg [[DBG259:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB26:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM76]]), !dbg [[DBG259]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER63:%.*]], !dbg [[DBG257]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM74]]), !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM76:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25]]), !dbg [[DBG260:![0-9]+]] ++// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB26:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM76]]), !dbg [[DBG260]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER63:%.*]], !dbg [[DBG258]] +// CHECK-DEBUG: omp_loop.after63: -+// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION45_PARALLEL_AFTER:%.*]], !dbg [[DBG260:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION45_PARALLEL_AFTER:%.*]], !dbg [[DBG261:![0-9]+]] +// CHECK-DEBUG: omp.par.region45.parallel.after: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE46:%.*]] +// CHECK-DEBUG: omp.par.pre_finalize46: -+// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG260]] ++// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG261]] +// CHECK-DEBUG: omp_loop.body60: -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV64]], [[TMP6]], !dbg [[DBG259]] -+// CHECK-DEBUG-NEXT: call void @__captured_stmt.10(ptr [[I52]], i32 [[TMP9]], ptr [[AGG_CAPTURED54]]), !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG261:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV67:%.*]] = sitofp i32 [[TMP10]] to double, !dbg [[DBG261]] -+// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG259]] -+// CHECK-DEBUG-NEXT: [[ADD68:%.*]] = fadd double [[CONV67]], [[TMP11]], !dbg [[DBG262:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[CONV69:%.*]] = fptrunc double [[ADD68]] to float, !dbg [[DBG261]] -+// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG263:![0-9]+]] -+// CHECK-DEBUG-NEXT: store float [[CONV69]], ptr [[TMP12]], align 4, !dbg [[DBG264:![0-9]+]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC61]], !dbg [[DBG257]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV64]], [[TMP6]], !dbg [[DBG260]] ++// CHECK-DEBUG-NEXT: call void @__captured_stmt.10(ptr [[I52]], i32 [[TMP9]], ptr [[AGG_CAPTURED54]]), !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG262:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV67:%.*]] = sitofp i32 [[TMP10]] to double, !dbg [[DBG262]] ++// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG260]] ++// CHECK-DEBUG-NEXT: [[ADD68:%.*]] = fadd double [[CONV67]], [[TMP11]], !dbg [[DBG263:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[CONV69:%.*]] = fptrunc double [[ADD68]] to float, !dbg [[DBG262]] ++// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG264:![0-9]+]] ++// CHECK-DEBUG-NEXT: store float [[CONV69]], ptr [[TMP12]], align 4, !dbg [[DBG265:![0-9]+]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC61]], !dbg [[DBG258]] +// CHECK-DEBUG: omp_loop.inc61: -+// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT66]] = add nuw i32 [[OMP_LOOP_IV64]], 1, !dbg [[DBG257]] -+// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER58]], !dbg [[DBG257]] ++// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT66]] = add nuw i32 [[OMP_LOOP_IV64]], 1, !dbg [[DBG258]] ++// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER58]], !dbg [[DBG258]] // CHECK-DEBUG: omp.par.outlined.exit.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.5 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG266:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG265:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG265:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG266:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2646,109 +2694,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META266:![0-9]+]], metadata !DIExpression()), !dbg [[DBG267:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META266:![0-9]+]], !DIExpression(), [[META267:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META267:![0-9]+]], !DIExpression(), [[META268:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META268:![0-9]+]], metadata !DIExpression()), !dbg [[DBG267]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META268:![0-9]+]], !DIExpression(), [[META267]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META269:![0-9]+]], !DIExpression(), [[META268]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META270:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG273:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG273]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG273]] --// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG272]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG276:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META277:![0-9]+]], metadata !DIExpression()), !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG276]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG271:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG272:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG272]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG272]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG271]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG275:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG275]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META269:![0-9]+]], !DIExpression(), [[META271:![0-9]+]]) +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG272:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG272]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG272]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META271]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META274:![0-9]+]], !DIExpression(), [[META275:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META275]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META276:![0-9]+]], !DIExpression(), [[META275]]) +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META275]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META275]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META275]] +-// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META275]] +-// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META275]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META270:![0-9]+]], !DIExpression(), [[META272:![0-9]+]]) ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG273:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG273]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG273]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META272]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META275:![0-9]+]], !DIExpression(), [[META276:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META276]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META277:![0-9]+]], !DIExpression(), [[META276]]) ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META276]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META276]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META276]] ++// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META276]] ++// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META276]] // CHECK-DEBUG: cond.true: --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG276]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG275]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META275]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META275]] +-// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META275]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META275]] +-// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META275]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META275]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META275]] +-// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META275]] +-// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META275]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META276]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META276]] ++// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META276]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META276]] ++// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META276]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META276]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META276]] ++// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META276]] ++// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META276]] // CHECK-DEBUG: cond.false: --// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG276]] -+// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG275]] +-// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META275]] ++// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META276]] // CHECK-DEBUG: cond.end: --// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG276]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG278:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG275]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG277:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META275]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META275]] +-// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META275]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG277:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META276]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META276]] ++// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META276]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG278:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.6 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG280:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG279:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG279:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG280:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META281:![0-9]+]], metadata !DIExpression()), !dbg [[DBG282:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META280:![0-9]+]], metadata !DIExpression()), !dbg [[DBG281:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META280:![0-9]+]], !DIExpression(), [[META281:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META281:![0-9]+]], !DIExpression(), [[META282:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META283:![0-9]+]], metadata !DIExpression()), !dbg [[DBG282]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG281]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META282:![0-9]+]], !DIExpression(), [[META281]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META283:![0-9]+]], !DIExpression(), [[META282]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG282]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META283:![0-9]+]], metadata !DIExpression()), !dbg [[DBG281]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META283:![0-9]+]], !DIExpression(), [[META281]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META284:![0-9]+]], !DIExpression(), [[META282]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG285:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG285]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG287:![0-9]+]] --// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG287]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG287]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG287]] --// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG282]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG285]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG284:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG284]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG286:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG286]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG286]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG286]] -+// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG281]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG284]] +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG284:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG284]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG286:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG286]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG286]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG286]] +-// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META281]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG284]] ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG285:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG285]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG287:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG287]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG287]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG287]] ++// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META282]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG285]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.7 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG288:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG287:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG287:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG288:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2756,109 +2804,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META289:![0-9]+]], metadata !DIExpression()), !dbg [[DBG290:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META288:![0-9]+]], metadata !DIExpression()), !dbg [[DBG289:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META288:![0-9]+]], !DIExpression(), [[META289:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META289:![0-9]+]], !DIExpression(), [[META290:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META291:![0-9]+]], metadata !DIExpression()), !dbg [[DBG290]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META290:![0-9]+]], metadata !DIExpression()), !dbg [[DBG289]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META290:![0-9]+]], !DIExpression(), [[META289]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META291:![0-9]+]], !DIExpression(), [[META290]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META292:![0-9]+]], metadata !DIExpression()), !dbg [[DBG294:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG295:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG295]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG295]] --// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG294]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG298]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META291:![0-9]+]], metadata !DIExpression()), !dbg [[DBG293:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG294:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG294]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG294]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG293]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META296:![0-9]+]], metadata !DIExpression()), !dbg [[DBG297:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META298:![0-9]+]], metadata !DIExpression()), !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG297]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META291:![0-9]+]], !DIExpression(), [[META293:![0-9]+]]) +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG294:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG294]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG294]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META293]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META296:![0-9]+]], !DIExpression(), [[META297:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META297]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META298:![0-9]+]], !DIExpression(), [[META297]]) +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META297]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META297]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META297]] +-// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META297]] +-// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META297]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META292:![0-9]+]], !DIExpression(), [[META294:![0-9]+]]) ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG295:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG295]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG295]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META294]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META297:![0-9]+]], !DIExpression(), [[META298:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META298]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META299:![0-9]+]], !DIExpression(), [[META298]]) ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META298]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META298]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META298]] ++// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META298]] ++// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META298]] // CHECK-DEBUG: cond.true: --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG298]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG297]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META297]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META297]] +-// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META297]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META297]] +-// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META297]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META297]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META297]] +-// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META297]] +-// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META297]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META298]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META298]] ++// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META298]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META298]] ++// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META298]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META298]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META298]] ++// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META298]] ++// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META298]] // CHECK-DEBUG: cond.false: --// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG298]] -+// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG297]] +-// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META297]] ++// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META298]] // CHECK-DEBUG: cond.end: --// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG298]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG300:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG297]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG299:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META297]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META297]] +-// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META297]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG299:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META298]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META298]] ++// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META298]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG300:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.8 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG302:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG301:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG301:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG302:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META303:![0-9]+]], metadata !DIExpression()), !dbg [[DBG304:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META302:![0-9]+]], metadata !DIExpression()), !dbg [[DBG303:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META302:![0-9]+]], !DIExpression(), [[META303:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META303:![0-9]+]], !DIExpression(), [[META304:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG304]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META304:![0-9]+]], metadata !DIExpression()), !dbg [[DBG303]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META304:![0-9]+]], !DIExpression(), [[META303]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META305:![0-9]+]], !DIExpression(), [[META304]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META306:![0-9]+]], metadata !DIExpression()), !dbg [[DBG304]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG303]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META305:![0-9]+]], !DIExpression(), [[META303]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META306:![0-9]+]], !DIExpression(), [[META304]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG307:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG307]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG309:![0-9]+]] --// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG309]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG309]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG309]] --// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG304]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG307]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG306:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG306]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG308:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG308]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG308]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG308]] -+// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG303]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG306]] +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG306:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG306]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG308:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG308]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG308]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG308]] +-// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META303]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG306]] ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG307:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG307]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG309:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG309]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG309]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG309]] ++// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META304]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG307]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.9 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG310:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG309:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG309:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG310:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2866,109 +2914,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META311:![0-9]+]], metadata !DIExpression()), !dbg [[DBG312:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META310:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META310:![0-9]+]], !DIExpression(), [[META311:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META311:![0-9]+]], !DIExpression(), [[META312:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META313:![0-9]+]], metadata !DIExpression()), !dbg [[DBG312]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META312:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META312:![0-9]+]], !DIExpression(), [[META311]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META313:![0-9]+]], !DIExpression(), [[META312]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META314:![0-9]+]], metadata !DIExpression()), !dbg [[DBG316:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG317:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG317]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG317]] --// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG316]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META319:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META321:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG320]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META313:![0-9]+]], metadata !DIExpression()), !dbg [[DBG315:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG316:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG316]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG316]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG315]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META318:![0-9]+]], metadata !DIExpression()), !dbg [[DBG319:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META320:![0-9]+]], metadata !DIExpression()), !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG319]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META313:![0-9]+]], !DIExpression(), [[META315:![0-9]+]]) +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG316:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG316]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG316]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META315]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META318:![0-9]+]], !DIExpression(), [[META319:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META319]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META320:![0-9]+]], !DIExpression(), [[META319]]) +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META319]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META319]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META319]] +-// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META319]] +-// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META319]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META314:![0-9]+]], !DIExpression(), [[META316:![0-9]+]]) ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG317:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG317]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG317]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META316]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META319:![0-9]+]], !DIExpression(), [[META320:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META320]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META321:![0-9]+]], !DIExpression(), [[META320]]) ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META320]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META320]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META320]] ++// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META320]] ++// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META320]] // CHECK-DEBUG: cond.true: --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG320]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG319]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META319]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META319]] +-// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META319]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META319]] +-// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META319]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META319]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META319]] +-// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META319]] +-// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META319]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META320]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META320]] ++// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META320]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META320]] ++// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META320]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META320]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META320]] ++// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META320]] ++// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META320]] // CHECK-DEBUG: cond.false: --// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG320]] -+// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG319]] +-// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META319]] ++// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META320]] // CHECK-DEBUG: cond.end: --// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG320]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG322:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG319]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG321:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META319]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META319]] +-// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META319]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG321:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META320]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META320]] ++// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META320]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG322:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.10 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG324:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG323:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG323:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG324:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META325:![0-9]+]], metadata !DIExpression()), !dbg [[DBG326:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META324:![0-9]+]], metadata !DIExpression()), !dbg [[DBG325:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META324:![0-9]+]], !DIExpression(), [[META325:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META325:![0-9]+]], !DIExpression(), [[META326:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META327:![0-9]+]], metadata !DIExpression()), !dbg [[DBG326]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META326:![0-9]+]], metadata !DIExpression()), !dbg [[DBG325]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META326:![0-9]+]], !DIExpression(), [[META325]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META327:![0-9]+]], !DIExpression(), [[META326]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META328:![0-9]+]], metadata !DIExpression()), !dbg [[DBG326]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META327:![0-9]+]], metadata !DIExpression()), !dbg [[DBG325]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META327:![0-9]+]], !DIExpression(), [[META325]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META328:![0-9]+]], !DIExpression(), [[META326]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG329:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG329]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG331:![0-9]+]] --// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG331]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG331]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG331]] --// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG326]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG329]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG328:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG328]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG330:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG330]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG330]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG330]] -+// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG325]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG328]] +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG328:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG328]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG330:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG330]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG330]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG330]] +-// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META325]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG328]] ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG329:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG329]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG331:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG331]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG331]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG331]] ++// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META326]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG329]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.11 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG332:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG331:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG331:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG332:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2976,109 +3024,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META333:![0-9]+]], metadata !DIExpression()), !dbg [[DBG334:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META332:![0-9]+]], metadata !DIExpression()), !dbg [[DBG333:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META332:![0-9]+]], !DIExpression(), [[META333:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META333:![0-9]+]], !DIExpression(), [[META334:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META335:![0-9]+]], metadata !DIExpression()), !dbg [[DBG334]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META334:![0-9]+]], metadata !DIExpression()), !dbg [[DBG333]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META334:![0-9]+]], !DIExpression(), [[META333]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META335:![0-9]+]], !DIExpression(), [[META334]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META336:![0-9]+]], metadata !DIExpression()), !dbg [[DBG338:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG339:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG339]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG339]] --// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG338]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META341:![0-9]+]], metadata !DIExpression()), !dbg [[DBG342:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META343:![0-9]+]], metadata !DIExpression()), !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG342]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META335:![0-9]+]], metadata !DIExpression()), !dbg [[DBG337:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG338:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG338]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG338]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG337]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META340:![0-9]+]], metadata !DIExpression()), !dbg [[DBG341:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META342:![0-9]+]], metadata !DIExpression()), !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG341]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META335:![0-9]+]], !DIExpression(), [[META337:![0-9]+]]) +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG338:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG338]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG338]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META337]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META340:![0-9]+]], !DIExpression(), [[META341:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META341]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META342:![0-9]+]], !DIExpression(), [[META341]]) +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META341]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META341]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META341]] +-// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META341]] +-// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META341]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META336:![0-9]+]], !DIExpression(), [[META338:![0-9]+]]) ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG339:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG339]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG339]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META338]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META341:![0-9]+]], !DIExpression(), [[META342:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META342]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META343:![0-9]+]], !DIExpression(), [[META342]]) ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META342]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META342]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META342]] ++// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META342]] ++// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META342]] // CHECK-DEBUG: cond.true: --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG342]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG341]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META341]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META341]] +-// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META341]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META341]] +-// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META341]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META341]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META341]] +-// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META341]] +-// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META341]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META342]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META342]] ++// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META342]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META342]] ++// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META342]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META342]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META342]] ++// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META342]] ++// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META342]] // CHECK-DEBUG: cond.false: --// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG342]] -+// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG341]] +-// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META341]] ++// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META342]] // CHECK-DEBUG: cond.end: --// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG342]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG344:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG341]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG343:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META341]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META341]] +-// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META341]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG343:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META342]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META342]] ++// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META342]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG344:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.12 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG346:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG345:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG345:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG346:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META347:![0-9]+]], metadata !DIExpression()), !dbg [[DBG348:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META346:![0-9]+]], metadata !DIExpression()), !dbg [[DBG347:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META346:![0-9]+]], !DIExpression(), [[META347:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META347:![0-9]+]], !DIExpression(), [[META348:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META349:![0-9]+]], metadata !DIExpression()), !dbg [[DBG348]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META348:![0-9]+]], metadata !DIExpression()), !dbg [[DBG347]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META348:![0-9]+]], !DIExpression(), [[META347]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META349:![0-9]+]], !DIExpression(), [[META348]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META350:![0-9]+]], metadata !DIExpression()), !dbg [[DBG348]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META349:![0-9]+]], metadata !DIExpression()), !dbg [[DBG347]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META349:![0-9]+]], !DIExpression(), [[META347]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META350:![0-9]+]], !DIExpression(), [[META348]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG351:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG351]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG353:![0-9]+]] --// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG353]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG353]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG353]] --// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG348]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG351]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG350:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG350]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG352:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG352]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG352]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG352]] -+// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG347]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG350]] +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG350:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG350]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG352:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG352]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG352]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG352]] +-// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META347]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG350]] ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG351:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG351]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG353:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG353]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG353]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG353]] ++// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META348]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG351]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.13 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG354:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG353:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG353:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG354:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -3086,109 +3134,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META355:![0-9]+]], metadata !DIExpression()), !dbg [[DBG356:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META354:![0-9]+]], metadata !DIExpression()), !dbg [[DBG355:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META354:![0-9]+]], !DIExpression(), [[META355:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META355:![0-9]+]], !DIExpression(), [[META356:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META357:![0-9]+]], metadata !DIExpression()), !dbg [[DBG356]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META356:![0-9]+]], metadata !DIExpression()), !dbg [[DBG355]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META356:![0-9]+]], !DIExpression(), [[META355]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META357:![0-9]+]], !DIExpression(), [[META356]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META358:![0-9]+]], metadata !DIExpression()), !dbg [[DBG360:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG361:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG361]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG361]] --// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG360]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META363:![0-9]+]], metadata !DIExpression()), !dbg [[DBG364:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META365:![0-9]+]], metadata !DIExpression()), !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG364]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META357:![0-9]+]], metadata !DIExpression()), !dbg [[DBG359:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG360:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG360]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG360]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG359]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META362:![0-9]+]], metadata !DIExpression()), !dbg [[DBG363:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META364:![0-9]+]], metadata !DIExpression()), !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG363]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META357:![0-9]+]], !DIExpression(), [[META359:![0-9]+]]) +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG360:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG360]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG360]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META359]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META362:![0-9]+]], !DIExpression(), [[META363:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META363]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META364:![0-9]+]], !DIExpression(), [[META363]]) +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META363]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META363]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META363]] +-// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META363]] +-// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META363]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META358:![0-9]+]], !DIExpression(), [[META360:![0-9]+]]) ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG361:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG361]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG361]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META360]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META363:![0-9]+]], !DIExpression(), [[META364:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META364]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META365:![0-9]+]], !DIExpression(), [[META364]]) ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META364]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META364]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META364]] ++// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META364]] ++// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META364]] // CHECK-DEBUG: cond.true: --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG364]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG363]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META363]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META363]] +-// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META363]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META363]] +-// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META363]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META363]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META363]] +-// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META363]] +-// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META363]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META364]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META364]] ++// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META364]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META364]] ++// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META364]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META364]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META364]] ++// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META364]] ++// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META364]] // CHECK-DEBUG: cond.false: --// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG364]] -+// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG363]] +-// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META363]] ++// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META364]] // CHECK-DEBUG: cond.end: --// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG364]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG366:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG363]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG365:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META363]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META363]] +-// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META363]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG365:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META364]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META364]] ++// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META364]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG366:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.14 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG368:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG367:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG367:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG368:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META369:![0-9]+]], metadata !DIExpression()), !dbg [[DBG370:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META368:![0-9]+]], metadata !DIExpression()), !dbg [[DBG369:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META368:![0-9]+]], !DIExpression(), [[META369:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META369:![0-9]+]], !DIExpression(), [[META370:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META371:![0-9]+]], metadata !DIExpression()), !dbg [[DBG370]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META370:![0-9]+]], metadata !DIExpression()), !dbg [[DBG369]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META370:![0-9]+]], !DIExpression(), [[META369]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META371:![0-9]+]], !DIExpression(), [[META370]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META372:![0-9]+]], metadata !DIExpression()), !dbg [[DBG370]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META371:![0-9]+]], metadata !DIExpression()), !dbg [[DBG369]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META371:![0-9]+]], !DIExpression(), [[META369]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META372:![0-9]+]], !DIExpression(), [[META370]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG373:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG373]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG375:![0-9]+]] --// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG375]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG375]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG375]] --// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG370]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG373]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG372:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG372]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG374:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG374]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG374]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG374]] -+// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG369]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG372]] +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG372:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG372]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG374:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG374]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG374]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG374]] +-// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META369]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG372]] ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG373:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG373]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG375:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG375]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG375]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG375]] ++// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META370]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG373]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.15 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG376:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG375:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG375:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG376:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -3196,109 +3244,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META377:![0-9]+]], metadata !DIExpression()), !dbg [[DBG378:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META376:![0-9]+]], metadata !DIExpression()), !dbg [[DBG377:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META376:![0-9]+]], !DIExpression(), [[META377:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META377:![0-9]+]], !DIExpression(), [[META378:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META379:![0-9]+]], metadata !DIExpression()), !dbg [[DBG378]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META378:![0-9]+]], metadata !DIExpression()), !dbg [[DBG377]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META378:![0-9]+]], !DIExpression(), [[META377]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META379:![0-9]+]], !DIExpression(), [[META378]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META380:![0-9]+]], metadata !DIExpression()), !dbg [[DBG382:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG383:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG383]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG383]] --// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG382]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META385:![0-9]+]], metadata !DIExpression()), !dbg [[DBG386:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META387:![0-9]+]], metadata !DIExpression()), !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG386]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META379:![0-9]+]], metadata !DIExpression()), !dbg [[DBG381:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG382:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG382]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG382]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG381]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META384:![0-9]+]], metadata !DIExpression()), !dbg [[DBG385:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META386:![0-9]+]], metadata !DIExpression()), !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG385]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META379:![0-9]+]], !DIExpression(), [[META381:![0-9]+]]) +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG382:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG382]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG382]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META381]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META384:![0-9]+]], !DIExpression(), [[META385:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META385]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META386:![0-9]+]], !DIExpression(), [[META385]]) +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META385]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META385]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META385]] +-// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META385]] +-// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META385]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META380:![0-9]+]], !DIExpression(), [[META382:![0-9]+]]) ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG383:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG383]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG383]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META382]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META385:![0-9]+]], !DIExpression(), [[META386:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META386]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META387:![0-9]+]], !DIExpression(), [[META386]]) ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META386]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META386]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META386]] ++// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META386]] ++// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META386]] // CHECK-DEBUG: cond.true: --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG386]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG385]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META385]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META385]] +-// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META385]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META385]] +-// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META385]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META385]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META385]] +-// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META385]] +-// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META385]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META386]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META386]] ++// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META386]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META386]] ++// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META386]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META386]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META386]] ++// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META386]] ++// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META386]] // CHECK-DEBUG: cond.false: --// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG386]] -+// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG385]] +-// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META385]] ++// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META386]] // CHECK-DEBUG: cond.end: --// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG386]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG388:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG385]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG387:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META385]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META385]] +-// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META385]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG387:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META386]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META386]] ++// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META386]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG388:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.16 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG390:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG389:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG389:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG390:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META391:![0-9]+]], metadata !DIExpression()), !dbg [[DBG392:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META390:![0-9]+]], metadata !DIExpression()), !dbg [[DBG391:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META390:![0-9]+]], !DIExpression(), [[META391:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META391:![0-9]+]], !DIExpression(), [[META392:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META393:![0-9]+]], metadata !DIExpression()), !dbg [[DBG392]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META392:![0-9]+]], metadata !DIExpression()), !dbg [[DBG391]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META392:![0-9]+]], !DIExpression(), [[META391]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META393:![0-9]+]], !DIExpression(), [[META392]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META394:![0-9]+]], metadata !DIExpression()), !dbg [[DBG392]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META393:![0-9]+]], metadata !DIExpression()), !dbg [[DBG391]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META393:![0-9]+]], !DIExpression(), [[META391]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META394:![0-9]+]], !DIExpression(), [[META392]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG395:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG395]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG397:![0-9]+]] --// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG397]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG397]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG397]] --// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG392]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG395]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG394:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG394]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG396:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG396]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG396]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG396]] -+// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG391]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG394]] +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG394:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG394]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG396:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG396]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG396]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG396]] +-// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META391]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG394]] ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG395:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG395]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG397:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG397]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG397]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG397]] ++// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META392]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG395]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.17 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG398:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG397:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG397:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG398:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -3306,109 +3354,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META399:![0-9]+]], metadata !DIExpression()), !dbg [[DBG400:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META398:![0-9]+]], metadata !DIExpression()), !dbg [[DBG399:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META398:![0-9]+]], !DIExpression(), [[META399:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META399:![0-9]+]], !DIExpression(), [[META400:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META401:![0-9]+]], metadata !DIExpression()), !dbg [[DBG400]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META400:![0-9]+]], metadata !DIExpression()), !dbg [[DBG399]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META400:![0-9]+]], !DIExpression(), [[META399]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META401:![0-9]+]], !DIExpression(), [[META400]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META402:![0-9]+]], metadata !DIExpression()), !dbg [[DBG404:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG405:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG405]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG405]] --// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG404]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META407:![0-9]+]], metadata !DIExpression()), !dbg [[DBG408:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META409:![0-9]+]], metadata !DIExpression()), !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG408]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META401:![0-9]+]], metadata !DIExpression()), !dbg [[DBG403:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG404:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG404]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG404]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG403]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META406:![0-9]+]], metadata !DIExpression()), !dbg [[DBG407:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META408:![0-9]+]], metadata !DIExpression()), !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG407]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META401:![0-9]+]], !DIExpression(), [[META403:![0-9]+]]) +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG404:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG404]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG404]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META403]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META406:![0-9]+]], !DIExpression(), [[META407:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META407]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META408:![0-9]+]], !DIExpression(), [[META407]]) +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META407]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META407]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META407]] +-// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META407]] +-// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META407]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META402:![0-9]+]], !DIExpression(), [[META404:![0-9]+]]) ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG405:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG405]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG405]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META404]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META407:![0-9]+]], !DIExpression(), [[META408:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META408]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META409:![0-9]+]], !DIExpression(), [[META408]]) ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META408]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META408]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META408]] ++// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META408]] ++// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META408]] // CHECK-DEBUG: cond.true: --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG408]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG407]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META407]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META407]] +-// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META407]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META407]] +-// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META407]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META407]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META407]] +-// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META407]] +-// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META407]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META408]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META408]] ++// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META408]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META408]] ++// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META408]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META408]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META408]] ++// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META408]] ++// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META408]] // CHECK-DEBUG: cond.false: --// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG408]] -+// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG407]] +-// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META407]] ++// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META408]] // CHECK-DEBUG: cond.end: --// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG408]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG410:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG407]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG409:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META407]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META407]] +-// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META407]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG409:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META408]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META408]] ++// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META408]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG410:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.18 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG412:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG411:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG411:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG412:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META413:![0-9]+]], metadata !DIExpression()), !dbg [[DBG414:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META412:![0-9]+]], metadata !DIExpression()), !dbg [[DBG413:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META412:![0-9]+]], !DIExpression(), [[META413:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META413:![0-9]+]], !DIExpression(), [[META414:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META415:![0-9]+]], metadata !DIExpression()), !dbg [[DBG414]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META414:![0-9]+]], metadata !DIExpression()), !dbg [[DBG413]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META414:![0-9]+]], !DIExpression(), [[META413]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META415:![0-9]+]], !DIExpression(), [[META414]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META416:![0-9]+]], metadata !DIExpression()), !dbg [[DBG414]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META415:![0-9]+]], metadata !DIExpression()), !dbg [[DBG413]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META415:![0-9]+]], !DIExpression(), [[META413]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META416:![0-9]+]], !DIExpression(), [[META414]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG417:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG417]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG419:![0-9]+]] --// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG419]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG419]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG419]] --// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG414]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG417]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG416:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG416]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG418:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG418]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG418]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG418]] -+// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG413]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG416]] +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG416:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG416]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG418:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG418]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG418]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG418]] +-// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META413]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG416]] ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG417:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG417]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG419:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG419]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG419]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG419]] ++// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META414]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG417]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.19 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG420:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG419:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG419:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG420:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -3416,109 +3464,470 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META421:![0-9]+]], metadata !DIExpression()), !dbg [[DBG422:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META420:![0-9]+]], metadata !DIExpression()), !dbg [[DBG421:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META420:![0-9]+]], !DIExpression(), [[META421:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META421:![0-9]+]], !DIExpression(), [[META422:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META423:![0-9]+]], metadata !DIExpression()), !dbg [[DBG422]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META422:![0-9]+]], metadata !DIExpression()), !dbg [[DBG421]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META422:![0-9]+]], !DIExpression(), [[META421]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META423:![0-9]+]], !DIExpression(), [[META422]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META424:![0-9]+]], metadata !DIExpression()), !dbg [[DBG426:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG427:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG427]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG427]] --// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG426]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META429:![0-9]+]], metadata !DIExpression()), !dbg [[DBG430:![0-9]+]] --// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META431:![0-9]+]], metadata !DIExpression()), !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG430]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META423:![0-9]+]], metadata !DIExpression()), !dbg [[DBG425:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG426:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG426]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG426]] -+// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG425]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META428:![0-9]+]], metadata !DIExpression()), !dbg [[DBG429:![0-9]+]] -+// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META430:![0-9]+]], metadata !DIExpression()), !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG429]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META423:![0-9]+]], !DIExpression(), [[META425:![0-9]+]]) +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG426:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG426]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG426]] +-// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META425]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META428:![0-9]+]], !DIExpression(), [[META429:![0-9]+]]) +-// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META429]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META430:![0-9]+]], !DIExpression(), [[META429]]) +-// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META429]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META429]] +-// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META429]] +-// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META429]] +-// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META429]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META424:![0-9]+]], !DIExpression(), [[META426:![0-9]+]]) ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG427:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG427]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG427]] ++// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META426]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META429:![0-9]+]], !DIExpression(), [[META430:![0-9]+]]) ++// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META430]] ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META431:![0-9]+]], !DIExpression(), [[META430]]) ++// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META430]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META430]] ++// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META430]] ++// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META430]] ++// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META430]] // CHECK-DEBUG: cond.true: --// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG430]] -+// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG429]] +-// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META429]] +-// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META429]] +-// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META429]] +-// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META429]] +-// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META429]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META429]] +-// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META429]] +-// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META429]] +-// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META429]] ++// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META430]] ++// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META430]] ++// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META430]] ++// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META430]] ++// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META430]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META430]] ++// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META430]] ++// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META430]] ++// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META430]] // CHECK-DEBUG: cond.false: --// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG430]] -+// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG429]] +-// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META429]] ++// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META430]] // CHECK-DEBUG: cond.end: --// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG430]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG432:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG429]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG431:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META429]] +-// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META429]] +-// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META429]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG431:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META430]] ++// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META430]] ++// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META430]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG432:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.20 --// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG434:![0-9]+]] { -+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG433:![0-9]+]] { +-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG433:![0-9]+]] { ++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG434:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META435:![0-9]+]], metadata !DIExpression()), !dbg [[DBG436:![0-9]+]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META434:![0-9]+]], metadata !DIExpression()), !dbg [[DBG435:![0-9]+]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META434:![0-9]+]], !DIExpression(), [[META435:![0-9]+]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META435:![0-9]+]], !DIExpression(), [[META436:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META437:![0-9]+]], metadata !DIExpression()), !dbg [[DBG436]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META436:![0-9]+]], metadata !DIExpression()), !dbg [[DBG435]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META436:![0-9]+]], !DIExpression(), [[META435]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META437:![0-9]+]], !DIExpression(), [[META436]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META438:![0-9]+]], metadata !DIExpression()), !dbg [[DBG436]] -+// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META437:![0-9]+]], metadata !DIExpression()), !dbg [[DBG435]] +-// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META437:![0-9]+]], !DIExpression(), [[META435]]) ++// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META438:![0-9]+]], !DIExpression(), [[META436]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 --// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG439:![0-9]+]] --// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG439]] --// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG441:![0-9]+]] --// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG441]] --// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG441]] --// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG441]] --// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG436]] --// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG439]] -+// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG438:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG438]] -+// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG440:![0-9]+]] -+// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG440]] -+// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG440]] -+// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG440]] -+// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG435]] -+// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG438]] +-// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG438:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG438]] +-// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG440:![0-9]+]] +-// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG440]] +-// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG440]] +-// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG440]] +-// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META435]] +-// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG438]] ++// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG439:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG439]] ++// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG441:![0-9]+]] ++// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG441]] ++// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG441]] ++// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG441]] ++// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META436]] ++// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG439]] // -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Frontend/CodeGenOptions.def llvm-project/flang/include/flang/Frontend/CodeGenOptions.def ---- llvm-project.orig/flang/include/flang/Frontend/CodeGenOptions.def 2024-06-12 10:43:12.596210747 -0500 -+++ llvm-project/flang/include/flang/Frontend/CodeGenOptions.def 2024-06-12 10:44:09.347614281 -0500 -@@ -40,5 +40,7 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/docs/DoConcurrentConversionToOpenMP.md llvm-project-aso/flang/docs/DoConcurrentConversionToOpenMP.md +--- llvm-project-aso-orig/flang/docs/DoConcurrentConversionToOpenMP.md 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/docs/DoConcurrentConversionToOpenMP.md 2024-11-23 20:39:47.172175395 -0600 +@@ -0,0 +1,332 @@ ++ ++ ++# `DO CONCURENT` mapping to OpenMP ++ ++```{contents} ++--- ++local: ++--- ++``` ++ ++This document seeks to describe the effort to parallelize `do concurrent` loops ++by mapping them to OpenMP worksharing constructs. The goals of this document ++are: ++* Describing how to instruct `flang-new` to map `DO CONCURENT` loops to OpenMP ++ constructs. ++* Tracking the current status of such mapping. ++* Describing the limitations of the current implmenentation. ++* Describing next steps. ++ ++## Usage ++ ++In order to enable `do concurrent` to OpenMP mapping, `flang-new` adds a new ++compiler flag: `-fdo-concurrent-parallel`. This flags has 3 possible values: ++1. `host`: this maps `do concurent` loops to run in parallel on the host CPU. ++ This maps such loops to the equivalent of `omp parallel do`. ++2. `device`: this maps `do concurent` loops to run in parallel on a device ++ (GPU). This maps such loops to the equivalent of `omp target teams ++ distribute parallel do`. ++3. `none`: this disables `do concurrent` mapping altogether. In such case, such ++ loops are emitted as sequential loops. ++ ++The above compiler switch is currently avaialble only when OpenMP is also ++enabled. So you need to provide the following options to flang in order to ++enable it: ++``` ++flang-new ... -fopenmp -fdo-concurrent-parallel=[host|device|none] ... ++``` ++ ++## Current status ++ ++Under the hood, `do concurrent` mapping is implemented in the ++`DoConcurrentConversionPass`. This is still an experimental pass which means ++that: ++* It has been tested in a very limited way so far. ++* It has been tested on simple synthetic inputs. ++ ++To describe current status in more detail, following is a description of how ++the pass currently behaves for single-range loops and then for multi-range ++loops. ++ ++### Single-range loops ++ ++Given the following loop: ++```fortran ++ do concurrent(i=1:n) ++ a(i) = i * i ++ end do ++``` ++ ++#### Mapping to `host` ++ ++Mapping this loop to the `host`, generates MLIR operations of the following ++structure: ++ ++```mlir ++%4 = fir.address_of(@_QFEa) ... ++%6:2 = hlfir.declare %4 ... ++ ++omp.parallel { ++ // Allocate private copy for `i`. ++ %19 = fir.alloca i32 {bindc_name = "i"} ++ %20:2 = hlfir.declare %19 {uniq_name = "_QFEi"} ... ++ ++ omp.wsloop { ++ omp.loop_nest (%arg0) : index = (%21) to (%22) inclusive step (%c1_2) { ++ %23 = fir.convert %arg0 : (index) -> i32 ++ // Use the privatized version of `i`. ++ fir.store %23 to %20#1 : !fir.ref ++ ... ++ ++ // Use "shared" SSA value of `a`. ++ %42 = hlfir.designate %6#0 ++ hlfir.assign %35 to %42 ++ ... ++ omp.yield ++ } ++ omp.terminator ++ } ++ omp.terminator ++} ++``` ++ ++#### Mapping to `device` ++ ++Mapping the same loop to the `device`, generates MLIR operations of the ++following structure: ++ ++```mlir ++// Map `a` to the `target` region. ++%29 = omp.map.info ... {name = "_QFEa"} ++omp.target ... map_entries(..., %29 -> %arg4 ...) { ++ ... ++ %51:2 = hlfir.declare %arg4 ++ ... ++ omp.teams { ++ // Allocate private copy for `i`. ++ %52 = fir.alloca i32 {bindc_name = "i"} ++ %53:2 = hlfir.declare %52 ++ ... ++ ++ omp.distribute { ++ omp.parallel { ++ omp.wsloop { ++ omp.loop_nest (%arg5) : index = (%54) to (%55) inclusive step (%c1_9) { ++ // Use the privatized version of `i`. ++ %56 = fir.convert %arg5 : (index) -> i32 ++ fir.store %56 to %53#1 ++ ... ++ // Use the mapped version of `a`. ++ ... = hlfir.designate %51#0 ++ ... ++ } ++ omp.terminator ++ } ++ omp.terminator ++ } ++ omp.terminator ++ } ++ omp.terminator ++ } ++ omp.terminator ++} ++``` ++ ++### Multi-range loops ++ ++The pass currently supports multi-range loops as well. Given the following ++example: ++ ++```fortran ++ do concurrent(i=1:n, j=1:m) ++ a(i,j) = i * j ++ end do ++``` ++ ++The generated `omp.loop_nest` operation look like: ++ ++```mlir ++omp.loop_nest (%arg0, %arg1) ++ : index = (%17, %19) to (%18, %20) ++ inclusive step (%c1_2, %c1_4) { ++ fir.store %arg0 to %private_i#1 : !fir.ref ++ fir.store %arg1 to %private_j#1 : !fir.ref ++ ... ++ omp.yield ++} ++``` ++ ++It is worth noting that we have privatized versions for both iteration ++variables: `i` and `j`. These are locally allocated inside the parallel/target ++OpenMP region similar to what the single-range example in previous section ++shows. ++ ++#### Multi-range and perfectly-nested loops ++ ++Currently, on the `FIR` dialect level, the following 2 loops are modelled in ++exactly the same way: ++ ++```fortran ++do concurrent(i=1:n, j=1:m) ++ a(i,j) = i * j ++end do ++``` ++ ++```fortran ++do concurrent(i=1:n) ++ do concurrent(j=1:m) ++ a(i,j) = i * j ++ end do ++end do ++``` ++ ++Both of the above loops are modelled as: ++ ++```mlir ++fir.do_loop %arg0 = %11 to %12 step %c1 unordered { ++ ... ++ fir.do_loop %arg1 = %14 to %15 step %c1_1 unordered { ++ ... ++ } ++} ++``` ++ ++Consequently, from the `DoConcurrentConversionPass`' perspective, both loops ++are treated in the same manner. Under the hood, the pass detects ++perfectly-nested loop nests and maps such nests as if they were multi-range ++loops. ++ ++#### Non-perfectly-nested loops ++ ++One limitation that the pass currently have is that it treats any intervening ++code in a loop nest as being disruptive to detecting that nest as a single ++unit. For example, given the following input: ++ ++```fortran ++do concurrent(i=1:n) ++ x = 41 ++ do concurrent(j=1:m) ++ a(i,j) = i * j ++ end do ++end do ++``` ++ ++Since there at least one statement between the 2 loop header (i.e. `x = 41`), ++the pass does not detect the `i` and `j` loops as a nest. Rather, the pass in ++that case only maps the `i` loop to OpenMP and leaves the `j` loop in its ++origianl form. In theory, in this example, we can sink the intervening code ++into the `j` loop and detect the complete nest. However, such transformation is ++still to be implemented in the future. ++ ++The above also has the consequence that the `j` variable will **not** be ++privatized in the OpenMP parallel/target region. In other words, it will be ++treated as if it was a `shared` variable. For more details about privatization, ++see the "Data environment" section below. ++ ++### Data environment ++ ++By default, variables that are used inside a `do concurernt` loop nest are ++either treated as `shared` in case of mapping to `host`, or mapped into the ++`target` region using a `map` clause in case of mapping to `device`. The only ++exceptions to this are: ++ 1. the loop's iteration variable(s) (IV) of **perfect** loop nests. In that ++ case, for each IV, we allocate a local copy as shown the by the mapping ++ examples above. ++ 1. any values that are from allocations outside the loop nest and used ++ exclusively inside of it. In such cases, a local privatized ++ value is created in the OpenMP region to prevent multiple teams of threads ++ from accessing and destroying the same memory block which causes runtime ++ issues. For an example of such cases, see ++ `flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90`. ++ ++#### Non-perfectly-nested loops' IVs ++ ++For non-perfectly-nested loops, the IVs are still treated as `shared` or ++`map` entries as pointed out above. This **might not** be consistent with what ++the Fortran specficiation tells us. In particular, taking the following ++snippets from the spec (version 2023) into account: ++ ++> § 3.35 ++> ------ ++> construct entity ++> entity whose identifier has the scope of a construct ++ ++> § 19.4 ++> ------ ++> A variable that appears as an index-name in a FORALL or DO CONCURRENT ++> construct, or ... is a construct entity. A variable that has LOCAL or ++> LOCAL_INIT locality in a DO CONCURRENT construct is a construct entity. ++> ... ++> The name of a variable that appears as an index-name in a DO CONCURRENT ++> construct, FORALL statement, or FORALL construct has a scope of the statement ++> or construct. A variable that has LOCAL or LOCAL_INIT locality in a DO ++> CONCURRENT construct has the scope of that construct. ++ ++From the above quotes, it seems there is an equivalence between the IV of a `do ++concurrent` loop and a variable with a `LOCAL` locality specifier (equivalent ++to OpenMP's `private` clause). Which means that we should probably ++localize/privatize a `do concurernt` loop's IV even if it is not perfectly ++nested in the nest we are parallelizing. For now, however, we **do not** do ++that as pointed out previously. In the near future, we propose a middle-ground ++solution (see the Next steps section for more details). ++ ++## Next steps ++ ++### Delayed privatization ++ ++So far, we emit the privatization logic for IVs inline in the parallel/target ++region. This is enough for our purposes right now since we don't ++localize/privatize any sophisticated types of variables yet. Once we have need ++for more advanced localization through `do concurrent`'s locality specifiers ++(see below), delayed privatization will enable us to have a much cleaner IR. ++Once delayed privatization's implementation upstream is supported for the ++required constructs by the pass, we will move to it rather than inlined/early ++privatization. ++ ++### Locality specifiers for `do concurrent` ++ ++Locality specifiers will enable the user to control the data environment of the ++loop nest in a more fine-grained way. Implementing these specifiers on the ++`FIR` dialect level is needed in order to support this in the ++`DoConcurrentConversionPass`. ++ ++Such specified will also unlock a potential solution to the ++non-perfectly-nested loops' IVs issue described above. In particular, for a ++non-perfectly nested loop, one middle-ground proposal/solution would be to: ++* Emit the loop's IV as shared/mapped just like we do currently. ++* Emit a warning that the IV of the loop is emitted as shared/mapped. ++* Given support for `LOCAL`, we can recommend the user to explicitly ++ localize/privatize the loop's IV if they choose to. ++ ++### More advanced detection of loop nests ++ ++As pointed out earlier, any intervening code between the headers of 2 nested ++`do concurrent` loops prevents us currently from detecting this as a loop nest. ++In some cases this is overly conservative. Therefore, a more flexible detection ++logic of loop nests needs to be implemented. ++ ++### Data-dependence analysis ++ ++Right now, we map loop nests without analysing whether such mapping is safe to ++do or not. We probalby need to at least warn the use of unsafe loop nests due ++to loop-carried dependencies. ++ ++### Non-rectangular loop nests ++ ++So far, we did not need to use the pass for non-rectangular loop nests. For ++example: ++```fortran ++do concurrent(i=1:n) ++ do concurrent(j=i:n) ++ ... ++ end do ++end do ++``` ++We defer this to the (hopefully) near future when we get the conversion in a ++good share for the samples/projects at hand. +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Decimal/binary-floating-point.h llvm-project-aso/flang/include/flang/Decimal/binary-floating-point.h +--- llvm-project-aso-orig/flang/include/flang/Decimal/binary-floating-point.h 2024-08-27 20:36:25.176173639 -0500 ++++ llvm-project-aso/flang/include/flang/Decimal/binary-floating-point.h 2024-11-23 20:39:47.176175380 -0600 +@@ -32,6 +32,7 @@ + + template class BinaryFloatingPointNumber { + public: ++ RT_OFFLOAD_VAR_GROUP_BEGIN + static constexpr common::RealCharacteristics realChars{BINARY_PRECISION}; + static constexpr int binaryPrecision{BINARY_PRECISION}; + static constexpr int bits{realChars.bits}; +@@ -47,7 +48,6 @@ + + using RawType = common::HostUnsignedIntType; + static_assert(CHAR_BIT * sizeof(RawType) >= bits); +- RT_OFFLOAD_VAR_GROUP_BEGIN + static constexpr RawType significandMask{(RawType{1} << significandBits) - 1}; + + constexpr RT_API_ATTRS BinaryFloatingPointNumber() {} // zero +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Frontend/CodeGenOptions.def llvm-project-aso/flang/include/flang/Frontend/CodeGenOptions.def +--- llvm-project-aso-orig/flang/include/flang/Frontend/CodeGenOptions.def 2024-08-27 20:36:25.176173639 -0500 ++++ llvm-project-aso/flang/include/flang/Frontend/CodeGenOptions.def 2024-11-23 20:39:47.176175380 -0600 +@@ -35,10 +35,13 @@ + CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass + + CODEGENOPT(Underscoring, 1, 1) ++CODEGENOPT(OffloadGlobalFiltering, 1, 1) + ENUM_CODEGENOPT(RelocationModel, llvm::Reloc::Model, 3, llvm::Reloc::PIC_) ///< Name of the relocation model to use. + ENUM_CODEGENOPT(DebugInfo, llvm::codegenoptions::DebugInfoKind, 4, llvm::codegenoptions::NoDebugInfo) ///< Level of debug info to generate ENUM_CODEGENOPT(VecLib, llvm::driver::VectorLibrary, 3, llvm::driver::VectorLibrary::NoLibrary) ///< Vector functions library to use ENUM_CODEGENOPT(FramePointer, llvm::FramePointerKind, 2, llvm::FramePointerKind::None) ///< Enable the usage of frame pointers @@ -3526,32 +3935,32 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Frontend/Cod + #undef CODEGENOPT #undef ENUM_CODEGENOPT -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Frontend/CodeGenOptions.h llvm-project/flang/include/flang/Frontend/CodeGenOptions.h ---- llvm-project.orig/flang/include/flang/Frontend/CodeGenOptions.h 2024-06-12 10:43:12.596210747 -0500 -+++ llvm-project/flang/include/flang/Frontend/CodeGenOptions.h 2024-06-12 10:44:09.347614281 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Frontend/CodeGenOptions.h llvm-project-aso/flang/include/flang/Frontend/CodeGenOptions.h +--- llvm-project-aso-orig/flang/include/flang/Frontend/CodeGenOptions.h 2024-10-18 17:40:32.468992659 -0500 ++++ llvm-project-aso/flang/include/flang/Frontend/CodeGenOptions.h 2024-11-23 20:39:47.176175380 -0600 @@ -15,6 +15,7 @@ #ifndef FORTRAN_FRONTEND_CODEGENOPTIONS_H #define FORTRAN_FRONTEND_CODEGENOPTIONS_H -+#include "flang/Optimizer/Transforms/Utils.h" ++#include "flang/Optimizer/OpenMP/Utils.h" #include "llvm/Frontend/Debug/Options.h" #include "llvm/Frontend/Driver/CodeGenOptions.h" #include "llvm/Support/CodeGen.h" -@@ -129,6 +130,10 @@ - /// transformation. - OptRemark OptimizationRemarkAnalysis; +@@ -143,6 +144,10 @@ + /// (-mlarge-data-threshold). + uint64_t LargeDataThreshold; + /// Optionally map `do concurrent` loops to OpenMP. This is only valid of + /// OpenMP is enabled. -+ using DoConcurrentMappingKind = fir::omp::DoConcurrentMappingKind; ++ using DoConcurrentMappingKind = flangomp::DoConcurrentMappingKind; + // Define accessors/mutators for code generation options of enumeration type. #define CODEGENOPT(Name, Bits, Default) #define ENUM_CODEGENOPT(Name, Type, Bits, Default) \ -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP/Clauses.h llvm-project/flang/include/flang/Lower/OpenMP/Clauses.h ---- llvm-project.orig/flang/include/flang/Lower/OpenMP/Clauses.h 1969-12-31 18:00:00.000000000 -0600 -+++ llvm-project/flang/include/flang/Lower/OpenMP/Clauses.h 2024-06-12 10:44:09.347614281 -0500 -@@ -0,0 +1,312 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Lower/OpenMP/Clauses.h llvm-project-aso/flang/include/flang/Lower/OpenMP/Clauses.h +--- llvm-project-aso-orig/flang/include/flang/Lower/OpenMP/Clauses.h 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/include/flang/Lower/OpenMP/Clauses.h 2024-11-23 20:39:47.176175380 -0600 +@@ -0,0 +1,330 @@ +//===-- Clauses.h -- OpenMP clause handling -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -3563,6 +3972,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP +#define FORTRAN_LOWER_OPENMP_CLAUSES_H + +#include "flang/Evaluate/expression.h" ++#include "flang/Evaluate/type.h" +#include "flang/Parser/parse-tree.h" +#include "flang/Semantics/expression.h" +#include "flang/Semantics/semantics.h" @@ -3583,12 +3993,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP +using namespace Fortran; +using SomeExpr = semantics::SomeExpr; +using MaybeExpr = semantics::MaybeExpr; -+ -+// evaluate::SomeType doesn't provide == operation. It's not really used in -+// flang's clauses so far, so a trivial implementation is sufficient. -+struct TypeTy : public evaluate::SomeType { -+ bool operator==(const TypeTy &t) const { return true; } -+}; ++using TypeTy = evaluate::DynamicType; + +template +struct IdTyTemplate { @@ -3609,6 +4014,13 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP + return designator == other.designator; + } + ++ // Defining an "ordering" which allows types derived from this to be ++ // utilised in maps and other containers that require comparison ++ // operators for ordering ++ bool operator<(const IdTyTemplate &other) const { ++ return symbol < other.symbol; ++ } ++ + operator bool() const { return symbol != nullptr; } +}; + @@ -3630,6 +4042,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP + Fortran::semantics::Symbol *sym() const { return identity.symbol; } + const std::optional &ref() const { return identity.designator; } + ++ bool operator<(const ObjectT &other) const { ++ return identity < other.identity; ++ } ++ + IdTy identity; +}; +} // namespace tomp::type @@ -3704,15 +4120,20 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP + semantics::SemanticsContext &semaCtx); + +namespace clause { ++using Range = tomp::type::RangeT; ++using Iterator = tomp::type::IteratorT; ++using IteratorSpecifier = tomp::type::IteratorSpecifierT; +using DefinedOperator = tomp::type::DefinedOperatorT; +using ProcedureDesignator = tomp::type::ProcedureDesignatorT; +using ReductionOperator = tomp::type::ReductionIdentifierT; ++using DependenceType = tomp::type::DependenceType; + +// "Requires" clauses are handled early on, and the aggregated information +// is stored in the Symbol details of modules, programs, and subprograms. +// These clauses are still handled here to cover all alternatives in the +// main clause variant. + ++using Absent = tomp::clause::AbsentT; +using AcqRel = tomp::clause::AcqRelT; +using Acquire = tomp::clause::AcquireT; +using AdjustArgs = tomp::clause::AdjustArgsT; @@ -3729,6 +4150,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP +using Capture = tomp::clause::CaptureT; +using Collapse = tomp::clause::CollapseT; +using Compare = tomp::clause::CompareT; ++using Contains = tomp::clause::ContainsT; +using Copyin = tomp::clause::CopyinT; +using Copyprivate = tomp::clause::CopyprivateT; +using Defaultmap = tomp::clause::DefaultmapT; @@ -3753,6 +4175,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP +using Grainsize = tomp::clause::GrainsizeT; +using HasDeviceAddr = tomp::clause::HasDeviceAddrT; +using Hint = tomp::clause::HintT; ++using Holds = tomp::clause::HoldsT; +using If = tomp::clause::IfT; +using Inbranch = tomp::clause::InbranchT; +using Inclusive = tomp::clause::InclusiveT; @@ -3767,6 +4190,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP +using Match = tomp::clause::MatchT; +using Mergeable = tomp::clause::MergeableT; +using Message = tomp::clause::MessageT; ++using NoOpenmp = tomp::clause::NoOpenmpT; ++using NoOpenmpRoutines = tomp::clause::NoOpenmpRoutinesT; ++using NoParallelism = tomp::clause::NoParallelismT; +using Nocontext = tomp::clause::NocontextT; +using Nogroup = tomp::clause::NogroupT; +using Nontemporal = tomp::clause::NontemporalT; @@ -3798,6 +4224,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP +using Simdlen = tomp::clause::SimdlenT; +using Simd = tomp::clause::SimdT; +using Sizes = tomp::clause::SizesT; ++using Permutation = tomp::clause::PermutationT; +using TaskReduction = tomp::clause::TaskReductionT; +using ThreadLimit = tomp::clause::ThreadLimitT; +using Threads = tomp::clause::ThreadsT; @@ -3864,10 +4291,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP +} // namespace Fortran::lower::omp + +#endif // FORTRAN_LOWER_OPENMP_CLAUSES_H -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP/Utils.h llvm-project/flang/include/flang/Lower/OpenMP/Utils.h ---- llvm-project.orig/flang/include/flang/Lower/OpenMP/Utils.h 1969-12-31 18:00:00.000000000 -0600 -+++ llvm-project/flang/include/flang/Lower/OpenMP/Utils.h 2024-06-12 10:44:09.347614281 -0500 -@@ -0,0 +1,116 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Lower/OpenMP/Utils.h llvm-project-aso/flang/include/flang/Lower/OpenMP/Utils.h +--- llvm-project-aso-orig/flang/include/flang/Lower/OpenMP/Utils.h 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/include/flang/Lower/OpenMP/Utils.h 2024-11-23 20:39:47.176175380 -0600 +@@ -0,0 +1,169 @@ +//===-- Lower/OpenMP/Utils.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -3884,6 +4311,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP +#include "mlir/IR/Location.h" +#include "mlir/IR/Value.h" +#include "llvm/Support/CommandLine.h" ++#include + +extern llvm::cl::opt treatIndexAsSection; +extern llvm::cl::opt enableDelayedPrivatization; @@ -3904,6 +4332,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP +} // namespace parser + +namespace lower { ++class StatementContext; +namespace pft { +struct Evaluation; +} @@ -3919,38 +4348,97 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP +// and index data when lowering OpenMP map clauses. Keeps track of the +// placement of the component in the derived type hierarchy it rests within, +// alongside the generated mlir::omp::MapInfoOp for the mapped component. -+struct OmpMapMemberIndicesData { ++// ++// As an example of what the contents of this data structure may be like, ++// when provided the following derived type and map of that type: ++// ++// type :: bottom_layer ++// real(8) :: i2 ++// real(4) :: array_i2(10) ++// real(4) :: array_j2(10) ++// end type bottom_layer ++// ++// type :: top_layer ++// real(4) :: i ++// integer(4) :: array_i(10) ++// real(4) :: j ++// type(bottom_layer) :: nested ++// integer, allocatable :: array_j(:) ++// integer(4) :: k ++// end type top_layer ++// ++// type(top_layer) :: top_dtype ++// ++// map(tofrom: top_dtype%nested%i2, top_dtype%k, top_dtype%nested%array_i2) ++// ++// We would end up with an OmpMapParentAndMemberData populated like below: ++// ++// memberPlacementIndices: ++// Vector 1: 3, 0 ++// Vector 2: 5 ++// Vector 3: 3, 1 ++// ++// memberMap: ++// Entry 1: omp.map.info for "top_dtype%nested%i2" ++// Entry 2: omp.map.info for "top_dtype%k" ++// Entry 3: omp.map.info for "top_dtype%nested%array_i2" ++// ++// And this OmpMapParentAndMemberData would be accessed via the parent ++// symbol for top_dtype. Other parent derived type instances that have ++// members mapped would have there own OmpMapParentAndMemberData entry ++// accessed via their own symbol. ++struct OmpMapParentAndMemberData { + // The indices representing the component members placement in its derived + // type parents hierarchy. -+ llvm::SmallVector memberPlacementIndices; ++ llvm::SmallVector> memberPlacementIndices; + + // Placement of the member in the member vector. -+ mlir::omp::MapInfoOp memberMap; ++ llvm::SmallVector memberMap; ++ ++ bool isDuplicateMemberMapInfo(llvm::SmallVectorImpl &memberIndices) { ++ return llvm::find_if(memberPlacementIndices, [&](auto &memberData) { ++ return llvm::equal(memberIndices, memberData); ++ }) != memberPlacementIndices.end(); ++ } ++ ++ void addChildIndexAndMapToParent(const omp::Object &object, ++ mlir::omp::MapInfoOp &mapOp, ++ semantics::SemanticsContext &semaCtx); +}; + +mlir::omp::MapInfoOp -+createMapInfoOp(mlir::OpBuilder &builder, mlir::Location loc, -+ mlir::Value baseAddr, mlir::Value varPtrPtr, std::string name, -+ mlir::ArrayRef bounds, -+ mlir::ArrayRef members, -+ mlir::DenseIntElementsAttr membersIndex, uint64_t mapType, ++createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc, ++ mlir::Value baseAddr, mlir::Value varPtrPtr, ++ llvm::StringRef name, llvm::ArrayRef bounds, ++ llvm::ArrayRef members, ++ mlir::ArrayAttr membersIndex, uint64_t mapType, + mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy, + bool partialMap = false); + -+void addChildIndexAndMapToParent( -+ const omp::Object &object, -+ std::map> &parentMemberIndices, -+ mlir::omp::MapInfoOp &mapOp, semantics::SemanticsContext &semaCtx); -+ +void insertChildMapInfoIntoParent( -+ lower::AbstractConverter &converter, -+ std::map> &parentMemberIndices, ++ Fortran::lower::AbstractConverter &converter, ++ Fortran::semantics::SemanticsContext &semaCtx, ++ Fortran::lower::StatementContext &stmtCtx, ++ std::map &parentMemberIndices, + llvm::SmallVectorImpl &mapOperands, -+ llvm::SmallVectorImpl &mapSyms, -+ llvm::SmallVectorImpl *mapSymTypes, -+ llvm::SmallVectorImpl *mapSymLocs); ++ llvm::SmallVectorImpl &mapSyms); ++ ++void generateMemberPlacementIndices( ++ const Object &object, llvm::SmallVectorImpl &indices, ++ Fortran::semantics::SemanticsContext &semaCtx); ++ ++bool isMemberOrParentAllocatableOrPointer( ++ const Object &object, Fortran::semantics::SemanticsContext &semaCtx); ++ ++mlir::Value createParentSymAndGenIntermediateMaps( ++ mlir::Location clauseLocation, Fortran::lower::AbstractConverter &converter, ++ semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, ++ omp::ObjectList &objectList, llvm::SmallVectorImpl &indices, ++ OmpMapParentAndMemberData &parentMemberIndices, llvm::StringRef asFortran, ++ llvm::omp::OpenMPOffloadMappingFlags mapTypeBits); ++ ++omp::ObjectList gatherObjectsOf(omp::Object derivedTypeMember, ++ semantics::SemanticsContext &semaCtx); + +mlir::Type getLoopVarType(lower::AbstractConverter &converter, + std::size_t loopVarTypeSize); @@ -3964,74 +4452,75 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP + +int64_t getCollapseValue(const List &clauses); + -+semantics::Symbol *getOmpObjectSymbol(const parser::OmpObject &ompObject); -+ +void genObjectList(const ObjectList &objects, + lower::AbstractConverter &converter, + llvm::SmallVectorImpl &operands); + -+// TODO: consider moving this to the `omp.loop_nest` op. Would be something like -+// this: -+// -+// ``` -+// mlir::Value LoopNestOp::calculateTripCount(mlir::OpBuilder &builder, -+// mlir::OpBuilder::InsertPoint ip) -+// ``` -+mlir::Value calculateTripCount(fir::FirOpBuilder &builder, mlir::Location loc, -+ const mlir::omp::CollapseClauseOps &ops); ++void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp, ++ mlir::Location loc); ++ +} // namespace omp +} // namespace lower +} // namespace Fortran + +#endif // FORTRAN_LOWER_OPENMPUTILS_H -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Optimizer/Transforms/Passes.h llvm-project/flang/include/flang/Optimizer/Transforms/Passes.h ---- llvm-project.orig/flang/include/flang/Optimizer/Transforms/Passes.h 2024-06-12 10:43:12.604210663 -0500 -+++ llvm-project/flang/include/flang/Optimizer/Transforms/Passes.h 2024-06-12 10:44:09.347614281 -0500 -@@ -10,10 +10,12 @@ - #define FORTRAN_OPTIMIZER_TRANSFORMS_PASSES_H - - #include "flang/Optimizer/Dialect/FIROps.h" -+#include "flang/Optimizer/Transforms/Utils.h" - #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" - #include "mlir/Dialect/OpenMP/OpenMPDialect.h" +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h llvm-project-aso/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h +--- llvm-project-aso-orig/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h 2024-11-14 15:28:41.122642523 -0600 ++++ llvm-project-aso/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h 2024-11-23 20:39:47.176175380 -0600 +@@ -67,7 +67,7 @@ + // end subroutine + // ------------------------------------------------- + // +- // flang -fc1 -emit-fir test.f90 -o test.fir ++ // flang-new -fc1 -emit-fir test.f90 -o test.fir + // + // ------------------- test.fir -------------------- + // fir.global @_QMtopEa : !fir.box>> +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Optimizer/OpenMP/Passes.h llvm-project-aso/flang/include/flang/Optimizer/OpenMP/Passes.h +--- llvm-project-aso-orig/flang/include/flang/Optimizer/OpenMP/Passes.h 2024-11-23 20:25:26.831275207 -0600 ++++ llvm-project-aso/flang/include/flang/Optimizer/OpenMP/Passes.h 2024-11-23 20:39:47.176175380 -0600 +@@ -13,6 +13,7 @@ + #ifndef FORTRAN_OPTIMIZER_OPENMP_PASSES_H + #define FORTRAN_OPTIMIZER_OPENMP_PASSES_H + ++#include "flang/Optimizer/OpenMP/Utils.h" + #include "mlir/Dialect/Func/IR/FuncOps.h" + #include "mlir/IR/BuiltinOps.h" #include "mlir/Pass/Pass.h" - #include "mlir/Pass/PassRegistry.h" -+ +@@ -21,6 +22,9 @@ #include - namespace mlir { -@@ -39,6 +41,7 @@ - #define GEN_PASS_DECL_ASSUMEDRANKOPCONVERSION - #define GEN_PASS_DECL_CHARACTERCONVERSION - #define GEN_PASS_DECL_CFGCONVERSION -+#define GEN_PASS_DECL_DOCONCURRENTCONVERSIONPASS - #define GEN_PASS_DECL_EXTERNALNAMECONVERSION - #define GEN_PASS_DECL_MEMREFDATAFLOWOPT - #define GEN_PASS_DECL_SIMPLIFYINTRINSICS -@@ -76,6 +79,8 @@ - std::unique_ptr - createVScaleAttrPass(std::pair vscaleAttr); - + namespace flangomp { ++ +std::unique_ptr createDoConcurrentConversionPass(bool mapToDevice); + - void populateCfgConversionRewrites(mlir::RewritePatternSet &patterns, - bool forceLoopToExecuteOnce = false, - bool setNSW = false); -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Optimizer/Transforms/Passes.td llvm-project/flang/include/flang/Optimizer/Transforms/Passes.td ---- llvm-project.orig/flang/include/flang/Optimizer/Transforms/Passes.td 2024-06-12 10:43:12.604210663 -0500 -+++ llvm-project/flang/include/flang/Optimizer/Transforms/Passes.td 2024-06-12 10:44:09.347614281 -0500 -@@ -15,6 +15,7 @@ - #define FLANG_OPTIMIZER_TRANSFORMS_PASSES + #define GEN_PASS_DECL + #define GEN_PASS_REGISTRATION + #include "flang/Optimizer/OpenMP/Passes.h.inc" +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Optimizer/OpenMP/Passes.td llvm-project-aso/flang/include/flang/Optimizer/OpenMP/Passes.td +--- llvm-project-aso-orig/flang/include/flang/Optimizer/OpenMP/Passes.td 2024-11-23 20:25:26.831275207 -0600 ++++ llvm-project-aso/flang/include/flang/Optimizer/OpenMP/Passes.td 2024-11-23 20:39:47.176175380 -0600 +@@ -10,6 +10,7 @@ + #define FORTRAN_OPTIMIZER_OPENMP_PASSES include "mlir/Pass/PassBase.td" +include "mlir/IR/EnumAttr.td" - def AbstractResultOpt - : Pass<"abstract-result"> { -@@ -408,4 +409,35 @@ + def MapInfoFinalizationPass + : Pass<"omp-map-info-finalization", "mlir::ModuleOp"> { +@@ -50,6 +51,46 @@ ]; } ++def GlobalFilteringPass : Pass<"omp-global-filtering"> { ++ let summary = "Filters out globals intended for the host when compiling " ++ "for the target device."; ++ let dependentDialects = [ ++ "mlir::func::FuncDialect", ++ "fir::FIROpsDialect" ++ ]; ++} ++ +def DoConcurrentConversionPass : Pass<"fopenmp-do-concurrent-conversion", "mlir::func::FuncOp"> { + let summary = "Map `DO CONCURRENT` loops to OpenMP worksharing loops."; + @@ -4049,87 +4538,278 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Optimizer/Tr + + let options = [ + Option<"mapTo", "map-to", -+ "fir::omp::DoConcurrentMappingKind", -+ /*default=*/"fir::omp::DoConcurrentMappingKind::DCMK_None", ++ "flangomp::DoConcurrentMappingKind", ++ /*default=*/"flangomp::DoConcurrentMappingKind::DCMK_None", + "Try to map `do concurrent` loops to OpenMP (on host or device)", + [{::llvm::cl::values( -+ clEnumValN(fir::omp::DoConcurrentMappingKind::DCMK_None, ++ clEnumValN(flangomp::DoConcurrentMappingKind::DCMK_None, + "none", "Do not lower `do concurrent` to OpenMP"), -+ clEnumValN(fir::omp::DoConcurrentMappingKind::DCMK_Host, ++ clEnumValN(flangomp::DoConcurrentMappingKind::DCMK_Host, + "host", "Lower to run in parallel on the CPU"), -+ clEnumValN(fir::omp::DoConcurrentMappingKind::DCMK_Device, ++ clEnumValN(flangomp::DoConcurrentMappingKind::DCMK_Device, + "device", "Lower to run in parallel on the GPU") + )}]>, + ]; +} + - #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Optimizer/Transforms/Utils.h llvm-project/flang/include/flang/Optimizer/Transforms/Utils.h ---- llvm-project.orig/flang/include/flang/Optimizer/Transforms/Utils.h 2024-06-12 10:43:12.604210663 -0500 -+++ llvm-project/flang/include/flang/Optimizer/Transforms/Utils.h 2024-06-12 10:44:09.347614281 -0500 -@@ -13,8 +13,13 @@ - #ifndef FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H - #define FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H - -+#include "mlir/IR/Location.h" -+#include "mlir/IR/Value.h" -+ - namespace fir { - -+class FirOpBuilder; + // Needs to be scheduled on Module as we create functions in it + def LowerWorkshare : Pass<"lower-workshare", "::mlir::ModuleOp"> { + let summary = "Lower workshare construct"; +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Optimizer/OpenMP/Utils.h llvm-project-aso/flang/include/flang/Optimizer/OpenMP/Utils.h +--- llvm-project-aso-orig/flang/include/flang/Optimizer/OpenMP/Utils.h 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/include/flang/Optimizer/OpenMP/Utils.h 2024-11-23 20:39:47.176175380 -0600 +@@ -0,0 +1,26 @@ ++//===-- Optimizer/OpenMP/Utils.h --------------------------------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef FORTRAN_OPTIMIZER_OPENMP_UTILS_H ++#define FORTRAN_OPTIMIZER_OPENMP_UTILS_H ++ ++namespace flangomp { + - using MinlocBodyOpGeneratorTy = llvm::function_ref &)>; -@@ -33,6 +38,13 @@ - mlir::Type maskElemType, mlir::Value resultArr, - bool maskMayBeLogicalScalar); - -+namespace omp { +enum class DoConcurrentMappingKind { -+ DCMK_None, // Do not lower `do concurrent` to OpenMP. -+ DCMK_Host, // Lower to run in parallel on the CPU. -+ DCMK_Device // Lower to run in parallel on the GPU. ++ DCMK_None, ///< Do not lower `do concurrent` to OpenMP. ++ DCMK_Host, ///< Lower to run in parallel on the CPU. ++ DCMK_Device ///< Lower to run in parallel on the GPU. +}; -+} - } // namespace fir - - #endif // FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Tools/CLOptions.inc llvm-project/flang/include/flang/Tools/CLOptions.inc ---- llvm-project.orig/flang/include/flang/Tools/CLOptions.inc 2024-06-12 10:43:12.608210621 -0500 -+++ llvm-project/flang/include/flang/Tools/CLOptions.inc 2024-06-12 10:44:09.347614281 -0500 -@@ -332,6 +332,9 @@ - pm.addPass(hlfir::createConvertHLFIRtoFIR()); - } ++ ++} // namespace flangomp ++ ++#endif // FORTRAN_OPTIMIZER_OPENMP_UTILS_H +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Optimizer/Passes/Pipelines.h llvm-project-aso/flang/include/flang/Optimizer/Passes/Pipelines.h +--- llvm-project-aso-orig/flang/include/flang/Optimizer/Passes/Pipelines.h 2024-11-23 20:25:26.831275207 -0600 ++++ llvm-project-aso/flang/include/flang/Optimizer/Passes/Pipelines.h 2024-11-23 20:39:47.176175380 -0600 +@@ -126,6 +126,15 @@ + mlir::PassManager &pm, bool enableOpenMP, + llvm::OptimizationLevel optLevel = defaultOptLevel); +using DoConcurrentMappingKind = + Fortran::frontend::CodeGenOptions::DoConcurrentMappingKind; ++ ++struct OpenMPFIRPassPipelineOpts { ++ bool isTargetDevice; ++ bool enableOffloadGlobalFiltering; ++ DoConcurrentMappingKind doConcurrentMappingKind; ++}; + /// Create a pass pipeline for handling certain OpenMP transformations needed /// prior to FIR lowering. /// -@@ -341,10 +344,15 @@ +@@ -135,7 +144,8 @@ /// \param pm - MLIR pass manager that will hold the pipeline definition. /// \param isTargetDevice - Whether code is being generated for a target device /// rather than the host device. --inline void createOpenMPFIRPassPipeline( -- mlir::PassManager &pm, bool isTargetDevice) { -+inline void createOpenMPFIRPassPipeline(mlir::PassManager &pm, -+ bool isTargetDevice, DoConcurrentMappingKind doConcurrentMappingKind) { -+ if (doConcurrentMappingKind != DoConcurrentMappingKind::DCMK_None) -+ pm.addPass(fir::createDoConcurrentConversionPass( -+ doConcurrentMappingKind == DoConcurrentMappingKind::DCMK_Device)); -+ - addNestedPassToAllTopLevelOperations( - pm, fir::createOMPMapInfoFinalizationPass); -+ - pm.addPass(fir::createOMPMarkDeclareTargetPass()); - if (isTargetDevice) - pm.addPass(fir::createOMPFunctionFiltering()); -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Frontend/CompilerInvocation.cpp llvm-project/flang/lib/Frontend/CompilerInvocation.cpp ---- llvm-project.orig/flang/lib/Frontend/CompilerInvocation.cpp 2024-06-12 10:43:12.612210579 -0500 -+++ llvm-project/flang/lib/Frontend/CompilerInvocation.cpp 2024-06-12 10:44:09.347614281 -0500 -@@ -155,6 +155,32 @@ +-void createOpenMPFIRPassPipeline(mlir::PassManager &pm, bool isTargetDevice); ++void createOpenMPFIRPassPipeline(mlir::PassManager &pm, ++ OpenMPFIRPassPipelineOpts opts); + + #if !defined(FLANG_EXCLUDE_CODEGEN) + void createDebugPasses(mlir::PassManager &pm, +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Optimizer/Transforms/Passes.h llvm-project-aso/flang/include/flang/Optimizer/Transforms/Passes.h +--- llvm-project-aso-orig/flang/include/flang/Optimizer/Transforms/Passes.h 2024-10-29 11:07:19.325635688 -0500 ++++ llvm-project-aso/flang/include/flang/Optimizer/Transforms/Passes.h 2024-11-23 20:39:47.176175380 -0600 +@@ -10,10 +10,12 @@ + #define FORTRAN_OPTIMIZER_TRANSFORMS_PASSES_H + + #include "flang/Optimizer/Dialect/FIROps.h" ++#include "flang/Optimizer/Transforms/Utils.h" + #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" + #include "mlir/Dialect/OpenMP/OpenMPDialect.h" + #include "mlir/Pass/Pass.h" + #include "mlir/Pass/PassRegistry.h" ++ + #include + + namespace mlir { +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Optimizer/Transforms/Utils.h llvm-project-aso/flang/include/flang/Optimizer/Transforms/Utils.h +--- llvm-project-aso-orig/flang/include/flang/Optimizer/Transforms/Utils.h 2024-08-27 20:36:25.188173519 -0500 ++++ llvm-project-aso/flang/include/flang/Optimizer/Transforms/Utils.h 2024-11-23 20:39:47.176175380 -0600 +@@ -13,8 +13,13 @@ + #ifndef FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H + #define FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H + ++#include "mlir/IR/Location.h" ++#include "mlir/IR/Value.h" ++ + namespace fir { + ++class FirOpBuilder; ++ + using MinlocBodyOpGeneratorTy = llvm::function_ref &)>; +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Runtime/allocator-registry.h llvm-project-aso/flang/include/flang/Runtime/allocator-registry.h +--- llvm-project-aso-orig/flang/include/flang/Runtime/allocator-registry.h 2024-10-18 17:40:32.476992577 -0500 ++++ llvm-project-aso/flang/include/flang/Runtime/allocator-registry.h 2024-11-23 20:39:47.176175380 -0600 +@@ -13,6 +13,8 @@ + #include + #include + ++RT_OFFLOAD_VAR_GROUP_BEGIN ++ + static constexpr unsigned kDefaultAllocator = 0; + + // Allocator used for CUF +@@ -21,6 +23,8 @@ + static constexpr unsigned kManagedAllocatorPos = 3; + static constexpr unsigned kUnifiedAllocatorPos = 4; + ++RT_OFFLOAD_VAR_GROUP_END ++ + #define MAX_ALLOCATOR 7 // 3 bits are reserved in the descriptor. + + namespace Fortran::runtime { +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Runtime/freestanding-tools.h llvm-project-aso/flang/include/flang/Runtime/freestanding-tools.h +--- llvm-project-aso-orig/flang/include/flang/Runtime/freestanding-tools.h 2024-11-23 20:25:26.835275192 -0600 ++++ llvm-project-aso/flang/include/flang/Runtime/freestanding-tools.h 2024-11-23 20:39:47.176175380 -0600 +@@ -23,6 +23,16 @@ + #define STD_FILL_N_UNSUPPORTED 1 + #endif + ++#if !defined(STD_MEMSET_UNSUPPORTED) && \ ++ (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__) ++#define STD_MEMSET_UNSUPPORTED 1 ++#endif ++ ++#if !defined(STD_MEMCPY_UNSUPPORTED) && \ ++ (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__) ++#define STD_MEMCPY_UNSUPPORTED 1 ++#endif ++ + #if !defined(STD_MEMMOVE_UNSUPPORTED) && \ + (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__) + #define STD_MEMMOVE_UNSUPPORTED 1 +@@ -63,6 +73,25 @@ + #define STD_TOUPPER_UNSUPPORTED 1 + #endif + ++#if defined(OMP_OFFLOAD_BUILD) || defined(OMP_NOHOST_BUILD) ++// #pragma message "Using replacements for unsupported std functions" ++#define STD_FILL_N_UNSUPPORTED 1 ++#define STD_MEMSET_USE_BUILTIN 1 ++#define STD_MEMSET_UNSUPPORTED 1 ++#define STD_MEMCPY_USE_BUILTIN 1 ++#define STD_MEMCPY_UNSUPPORTED 1 ++// #define STD_MEMMOVE_USE_BUILTIN 1 // address now taken in assign.h ++#define STD_MEMMOVE_UNSUPPORTED 1 ++// #define STD_STRLEN_USE_BUILTIN 1 // still resolves to strlen ++#define STD_STRLEN_UNSUPPORTED 1 ++#define STD_MEMCMP_UNSUPPORTED 1 ++#define STD_REALLOC_UNSUPPORTED 1 ++#define STD_MEMCHR_UNSUPPORTED 1 ++#define STD_STRCPY_UNSUPPORTED 1 ++#define STD_STRCMP_UNSUPPORTED 1 ++#define STD_TOUPPER_UNSUPPORTED 1 ++#endif ++ + namespace Fortran::runtime { + + #if STD_FILL_N_UNSUPPORTED +@@ -79,7 +108,52 @@ + using std::fill_n; + #endif // !STD_FILL_N_UNSUPPORTED + +-#if STD_MEMMOVE_UNSUPPORTED ++#if STD_MEMSET_USE_BUILTIN ++static inline RT_API_ATTRS void memset( ++ void *dest, uint8_t value, std::size_t count) { ++ __builtin_memset(dest, value, count); ++} ++#elif STD_MEMSET_UNSUPPORTED ++static inline RT_API_ATTRS void memset( ++ void *dest, uint8_t value, std::size_t count) { ++ char *to{reinterpret_cast(dest)}; ++ while (count--) { ++ *to++ = value; ++ } ++ return; ++} ++#else ++using std::memset; ++#endif ++ ++#if STD_MEMCPY_USE_BUILTIN ++static inline RT_API_ATTRS void memcpy( ++ void *dest, const void *src, std::size_t count) { ++ __builtin_memcpy(dest, src, count); ++} ++#elif STD_MEMCPY_UNSUPPORTED ++static inline RT_API_ATTRS void memcpy( ++ void *dest, const void *src, std::size_t count) { ++ char *to{reinterpret_cast(dest)}; ++ const char *from{reinterpret_cast(src)}; ++ if (to == from) { ++ return; ++ } ++ while (count--) { ++ *to++ = *from++; ++ } ++ return; ++} ++#else ++using std::memcpy; ++#endif ++ ++#if STD_MEMMOVE_USE_BUILTIN ++static inline RT_API_ATTRS void memmove( ++ void *dest, const void *src, std::size_t count) { ++ __builtin_memmove(dest, src, count); ++} ++#elif STD_MEMMOVE_UNSUPPORTED + // Provides alternative implementation for std::memmove(), if + // it is not supported. + static inline RT_API_ATTRS void *memmove( +@@ -88,10 +162,10 @@ + const char *from{reinterpret_cast(src)}; + + if (to == from) { +- return; ++ return dest; + } + if (to + count <= from || from + count <= to) { +- std::memcpy(dest, src, count); ++ memcpy(dest, src, count); + } else if (to < from) { + while (count--) { + *to++ = *from++; +@@ -118,7 +192,11 @@ + } + #endif + +-#if STD_STRLEN_UNSUPPORTED ++#if STD_STRLEN_USE_BUILTIN ++static inline RT_API_ATTRS std::size_t strlen(const char *str) { ++ return __builtin_strlen(str); ++} ++#elif STD_STRLEN_UNSUPPORTED + // Provides alternative implementation for std::strlen(), if + // it is not supported. + static inline RT_API_ATTRS std::size_t strlen(const char *str) { +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Tools/CrossToolHelpers.h llvm-project-aso/flang/include/flang/Tools/CrossToolHelpers.h +--- llvm-project-aso-orig/flang/include/flang/Tools/CrossToolHelpers.h 2024-11-23 20:25:26.835275192 -0600 ++++ llvm-project-aso/flang/include/flang/Tools/CrossToolHelpers.h 2024-11-23 20:39:47.176175380 -0600 +@@ -165,7 +165,7 @@ + bool OpenMPIsTargetDevice = false; + bool OpenMPIsGPU = false; + bool OpenMPForceUSM = false; +- uint32_t OpenMPVersion = 11; ++ uint32_t OpenMPVersion = 52; + std::string OMPHostIRFile = {}; + std::vector OMPTargetTriples = {}; + bool NoGPULib = false; +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Frontend/CompilerInvocation.cpp llvm-project-aso/flang/lib/Frontend/CompilerInvocation.cpp +--- llvm-project-aso-orig/flang/lib/Frontend/CompilerInvocation.cpp 2024-11-23 20:25:26.835275192 -0600 ++++ llvm-project-aso/flang/lib/Frontend/CompilerInvocation.cpp 2024-11-23 20:39:47.180175366 -0600 +@@ -157,6 +157,32 @@ return true; } @@ -4162,34 +4842,63 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Frontend/CompilerInvoc static bool parseVectorLibArg(Fortran::frontend::CodeGenOptions &opts, llvm::opt::ArgList &args, clang::DiagnosticsEngine &diags) { -@@ -386,6 +412,8 @@ +@@ -422,6 +448,13 @@ clang::driver::options::OPT_funderscoring, false)) { opts.Underscoring = 0; } + ++ if (args.hasFlag(clang::driver::options::OPT_fno_offload_global_filtering, ++ clang::driver::options::OPT_foffload_global_filtering, false)) { ++ opts.OffloadGlobalFiltering = 0; ++ } ++ + parseDoConcurrentMapping(opts, args, diags); } /// Parses all target input arguments and populates the target -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Frontend/FrontendActions.cpp llvm-project/flang/lib/Frontend/FrontendActions.cpp ---- llvm-project.orig/flang/lib/Frontend/FrontendActions.cpp 2024-06-12 10:43:12.612210579 -0500 -+++ llvm-project/flang/lib/Frontend/FrontendActions.cpp 2024-06-12 10:44:09.347614281 -0500 -@@ -320,16 +320,34 @@ +@@ -1014,8 +1047,8 @@ + unsigned numErrorsBefore = diags.getNumErrors(); + llvm::Triple t(res.getTargetOpts().triple); + +- // By default OpenMP is set to 1.1 version +- res.getLangOpts().OpenMPVersion = 11; ++ // By default OpenMP is set to 5.2 version ++ res.getLangOpts().OpenMPVersion = 52; + res.getFrontendOpts().features.Enable( + Fortran::common::LanguageFeature::OpenMP); + if (int Version = getLastArgIntValue( +@@ -1483,6 +1516,7 @@ + auto &fortranOptions = getFortranOpts(); + const auto &frontendOptions = getFrontendOpts(); + // Populate the macro list with version numbers and other predefinitions. ++ fortranOptions.predefinitions.emplace_back("__amdflang__", "1"); + fortranOptions.predefinitions.emplace_back("__flang__", "1"); + fortranOptions.predefinitions.emplace_back("__flang_major__", + FLANG_VERSION_MAJOR_STRING); +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Frontend/FrontendActions.cpp llvm-project-aso/flang/lib/Frontend/FrontendActions.cpp +--- llvm-project-aso-orig/flang/lib/Frontend/FrontendActions.cpp 2024-11-23 20:25:26.835275192 -0600 ++++ llvm-project-aso/flang/lib/Frontend/FrontendActions.cpp 2024-11-23 20:39:47.180175366 -0600 +@@ -330,16 +330,38 @@ // Add OpenMP-related passes // WARNING: These passes must be run immediately after the lowering to ensure // that the FIR is correct with respect to OpenMP operations/attributes. - if (ci.getInvocation().getFrontendOpts().features.IsEnabled( - Fortran::common::LanguageFeature::OpenMP)) { +- bool isDevice = false; + bool isOpenMPEnabled = + ci.getInvocation().getFrontendOpts().features.IsEnabled( + Fortran::common::LanguageFeature::OpenMP); + ++ fir::OpenMPFIRPassPipelineOpts opts; ++ + using DoConcurrentMappingKind = + Fortran::frontend::CodeGenOptions::DoConcurrentMappingKind; -+ DoConcurrentMappingKind doConcurrentMappingKind = ++ opts.doConcurrentMappingKind = + ci.getInvocation().getCodeGenOpts().getDoConcurrentMapping(); ++ opts.enableOffloadGlobalFiltering = ++ ci.getInvocation().getCodeGenOpts().OffloadGlobalFiltering; + -+ if (doConcurrentMappingKind != DoConcurrentMappingKind::DCMK_None && ++ if (opts.doConcurrentMappingKind != DoConcurrentMappingKind::DCMK_None && + !isOpenMPEnabled) { + unsigned diagID = ci.getDiagnostics().getCustomDiagID( + clang::DiagnosticsEngine::Warning, @@ -4199,22 +4908,38 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Frontend/FrontendActio + } + + if (isOpenMPEnabled) { - bool isDevice = false; ++ opts.isTargetDevice = false; if (auto offloadMod = llvm::dyn_cast( mlirModule->getOperation())) - isDevice = offloadMod.getIsTargetDevice(); +- isDevice = offloadMod.getIsTargetDevice(); ++ opts.isTargetDevice = offloadMod.getIsTargetDevice(); + // WARNING: This pipeline must be run immediately after the lowering to // ensure that the FIR is correct with respect to OpenMP operations/ // attributes. - fir::createOpenMPFIRPassPipeline(pm, isDevice); -+ fir::createOpenMPFIRPassPipeline(pm, isDevice, doConcurrentMappingKind); ++ fir::createOpenMPFIRPassPipeline(pm, opts); } pm.enableVerifier(/*verifyPasses=*/true); -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/ClauseProcessor.cpp llvm-project/flang/lib/Lower/OpenMP/ClauseProcessor.cpp ---- llvm-project.orig/flang/lib/Lower/OpenMP/ClauseProcessor.cpp 2024-06-12 10:43:12.620210495 -0500 -+++ llvm-project/flang/lib/Lower/OpenMP/ClauseProcessor.cpp 2024-06-12 10:44:09.347614281 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/DirectivesCommon.h llvm-project-aso/flang/lib/Lower/DirectivesCommon.h +--- llvm-project-aso-orig/flang/lib/Lower/DirectivesCommon.h 2024-11-06 08:35:35.199250155 -0600 ++++ llvm-project-aso/flang/lib/Lower/DirectivesCommon.h 2024-11-23 20:39:47.180175366 -0600 +@@ -984,7 +984,10 @@ + // If it is a scalar subscript, then the upper bound + // is equal to the lower bound, and the extent is one. + ubound = lbound; +- extent = one; ++ if (treatIndexAsSection) ++ extent = fir::factory::readExtent(builder, loc, dataExv, dimension); ++ else ++ extent = one; + } else { + asFortran << ':'; + Fortran::semantics::MaybeExpr upper = +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/ClauseProcessor.cpp llvm-project-aso/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/ClauseProcessor.cpp 2024-11-23 20:25:26.839275178 -0600 ++++ llvm-project-aso/flang/lib/Lower/OpenMP/ClauseProcessor.cpp 2024-11-23 20:39:47.180175366 -0600 @@ -11,8 +11,8 @@ //===----------------------------------------------------------------------===// @@ -4225,9 +4950,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/ClausePro #include "flang/Lower/PFTBuilder.h" #include "flang/Parser/tools.h" #include "flang/Semantics/tools.h" -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/ClauseProcessor.h llvm-project/flang/lib/Lower/OpenMP/ClauseProcessor.h ---- llvm-project.orig/flang/lib/Lower/OpenMP/ClauseProcessor.h 2024-06-12 10:43:12.620210495 -0500 -+++ llvm-project/flang/lib/Lower/OpenMP/ClauseProcessor.h 2024-06-12 10:44:09.347614281 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/ClauseProcessor.h llvm-project-aso/flang/lib/Lower/OpenMP/ClauseProcessor.h +--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/ClauseProcessor.h 2024-11-23 20:25:26.839275178 -0600 ++++ llvm-project-aso/flang/lib/Lower/OpenMP/ClauseProcessor.h 2024-11-23 20:39:47.180175366 -0600 @@ -12,12 +12,12 @@ #ifndef FORTRAN_LOWER_CLAUSEPROCESSOR_H #define FORTRAN_LOWER_CLAUSEPROCESSOR_H @@ -4243,9 +4968,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/ClausePro #include "flang/Optimizer/Builder/Todo.h" #include "flang/Parser/dump-parse-tree.h" #include "flang/Parser/parse-tree.h" -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Clauses.cpp llvm-project/flang/lib/Lower/OpenMP/Clauses.cpp ---- llvm-project.orig/flang/lib/Lower/OpenMP/Clauses.cpp 2024-06-12 10:43:12.620210495 -0500 -+++ llvm-project/flang/lib/Lower/OpenMP/Clauses.cpp 2024-06-12 10:44:09.347614281 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/Clauses.cpp llvm-project-aso/flang/lib/Lower/OpenMP/Clauses.cpp +--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/Clauses.cpp 2024-11-23 20:25:26.839275178 -0600 ++++ llvm-project-aso/flang/lib/Lower/OpenMP/Clauses.cpp 2024-11-23 20:39:47.180175366 -0600 @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// @@ -4255,335 +4980,20 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Clauses.c #include "flang/Common/idioms.h" #include "flang/Evaluate/expression.h" -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Clauses.h llvm-project/flang/lib/Lower/OpenMP/Clauses.h ---- llvm-project.orig/flang/lib/Lower/OpenMP/Clauses.h 2024-06-12 10:43:12.620210495 -0500 -+++ llvm-project/flang/lib/Lower/OpenMP/Clauses.h 1969-12-31 18:00:00.000000000 -0600 -@@ -1,312 +0,0 @@ --//===-- Clauses.h -- OpenMP clause handling -------------------------------===// --// --// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. --// See https://llvm.org/LICENSE.txt for license information. --// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception --// --//===----------------------------------------------------------------------===// --#ifndef FORTRAN_LOWER_OPENMP_CLAUSES_H --#define FORTRAN_LOWER_OPENMP_CLAUSES_H -- --#include "flang/Evaluate/expression.h" --#include "flang/Parser/parse-tree.h" --#include "flang/Semantics/expression.h" --#include "flang/Semantics/semantics.h" --#include "flang/Semantics/symbol.h" -- --#include "llvm/ADT/STLExtras.h" --#include "llvm/Frontend/OpenMP/ClauseT.h" -- --#include --#include --#include -- --namespace Fortran::semantics { --class Symbol; --} -- --namespace Fortran::lower::omp { --using namespace Fortran; --using SomeExpr = semantics::SomeExpr; --using MaybeExpr = semantics::MaybeExpr; -- --// evaluate::SomeType doesn't provide == operation. It's not really used in --// flang's clauses so far, so a trivial implementation is sufficient. --struct TypeTy : public evaluate::SomeType { -- bool operator==(const TypeTy &t) const { return true; } --}; -- --template --struct IdTyTemplate { -- // "symbol" is always non-null for id's of actual objects. -- Fortran::semantics::Symbol *symbol; -- std::optional designator; -- -- bool operator==(const IdTyTemplate &other) const { -- // If symbols are different, then the objects are different. -- if (symbol != other.symbol) -- return false; -- if (symbol == nullptr) -- return true; -- // Equal symbols don't necessarily indicate identical objects, -- // for example, a derived object component may use a single symbol, -- // which will refer to different objects for different designators, -- // e.g. a%c and b%c. -- return designator == other.designator; -- } -- -- operator bool() const { return symbol != nullptr; } --}; -- --using ExprTy = SomeExpr; -- --template --using List = tomp::ListT; --} // namespace Fortran::lower::omp -- --// Specialization of the ObjectT template --namespace tomp::type { --template <> --struct ObjectT, -- Fortran::lower::omp::ExprTy> { -- using IdTy = Fortran::lower::omp::IdTyTemplate; -- using ExprTy = Fortran::lower::omp::ExprTy; -- -- IdTy id() const { return identity; } -- Fortran::semantics::Symbol *sym() const { return identity.symbol; } -- const std::optional &ref() const { return identity.designator; } -- -- IdTy identity; --}; --} // namespace tomp::type -- --namespace Fortran::lower::omp { --using IdTy = IdTyTemplate; --} -- --namespace std { --template <> --struct hash { -- size_t operator()(const Fortran::lower::omp::IdTy &id) const { -- return static_cast(reinterpret_cast(id.symbol)); -- } --}; --} // namespace std -- --namespace Fortran::lower::omp { --using Object = tomp::ObjectT; --using ObjectList = tomp::ObjectListT; -- --Object makeObject(const parser::OmpObject &object, -- semantics::SemanticsContext &semaCtx); --Object makeObject(const parser::Name &name, -- semantics::SemanticsContext &semaCtx); --Object makeObject(const parser::Designator &dsg, -- semantics::SemanticsContext &semaCtx); --Object makeObject(const parser::StructureComponent &comp, -- semantics::SemanticsContext &semaCtx); -- --inline auto makeObjectFn(semantics::SemanticsContext &semaCtx) { -- return [&](auto &&s) { return makeObject(s, semaCtx); }; --} -- --template --SomeExpr makeExpr(T &&pftExpr, semantics::SemanticsContext &semaCtx) { -- auto maybeExpr = evaluate::ExpressionAnalyzer(semaCtx).Analyze(pftExpr); -- assert(maybeExpr); -- return std::move(*maybeExpr); --} -- --inline auto makeExprFn(semantics::SemanticsContext &semaCtx) { -- return [&](auto &&s) { return makeExpr(s, semaCtx); }; --} -- --template < -- typename ContainerTy, typename FunctionTy, -- typename ElemTy = typename llvm::remove_cvref_t::value_type, -- typename ResultTy = std::invoke_result_t> --List makeList(ContainerTy &&container, FunctionTy &&func) { -- List v; -- llvm::transform(container, std::back_inserter(v), func); -- return v; --} -- --inline ObjectList makeObjects(const parser::OmpObjectList &objects, -- semantics::SemanticsContext &semaCtx) { -- return makeList(objects.v, makeObjectFn(semaCtx)); --} -- --template > --std::optional maybeApply(FuncTy &&func, -- const std::optional &arg) { -- if (!arg) -- return std::nullopt; -- return std::move(func(*arg)); --} -- --std::optional getBaseObject(const Object &object, -- semantics::SemanticsContext &semaCtx); -- --namespace clause { --using DefinedOperator = tomp::type::DefinedOperatorT; --using ProcedureDesignator = tomp::type::ProcedureDesignatorT; --using ReductionOperator = tomp::type::ReductionIdentifierT; -- --// "Requires" clauses are handled early on, and the aggregated information --// is stored in the Symbol details of modules, programs, and subprograms. --// These clauses are still handled here to cover all alternatives in the --// main clause variant. -- --using AcqRel = tomp::clause::AcqRelT; --using Acquire = tomp::clause::AcquireT; --using AdjustArgs = tomp::clause::AdjustArgsT; --using Affinity = tomp::clause::AffinityT; --using Aligned = tomp::clause::AlignedT; --using Align = tomp::clause::AlignT; --using Allocate = tomp::clause::AllocateT; --using Allocator = tomp::clause::AllocatorT; --using AppendArgs = tomp::clause::AppendArgsT; --using AtomicDefaultMemOrder = -- tomp::clause::AtomicDefaultMemOrderT; --using At = tomp::clause::AtT; --using Bind = tomp::clause::BindT; --using Capture = tomp::clause::CaptureT; --using Collapse = tomp::clause::CollapseT; --using Compare = tomp::clause::CompareT; --using Copyin = tomp::clause::CopyinT; --using Copyprivate = tomp::clause::CopyprivateT; --using Defaultmap = tomp::clause::DefaultmapT; --using Default = tomp::clause::DefaultT; --using Depend = tomp::clause::DependT; --using Destroy = tomp::clause::DestroyT; --using Detach = tomp::clause::DetachT; --using Device = tomp::clause::DeviceT; --using DeviceType = tomp::clause::DeviceTypeT; --using DistSchedule = tomp::clause::DistScheduleT; --using Doacross = tomp::clause::DoacrossT; --using DynamicAllocators = -- tomp::clause::DynamicAllocatorsT; --using Enter = tomp::clause::EnterT; --using Exclusive = tomp::clause::ExclusiveT; --using Fail = tomp::clause::FailT; --using Filter = tomp::clause::FilterT; --using Final = tomp::clause::FinalT; --using Firstprivate = tomp::clause::FirstprivateT; --using From = tomp::clause::FromT; --using Full = tomp::clause::FullT; --using Grainsize = tomp::clause::GrainsizeT; --using HasDeviceAddr = tomp::clause::HasDeviceAddrT; --using Hint = tomp::clause::HintT; --using If = tomp::clause::IfT; --using Inbranch = tomp::clause::InbranchT; --using Inclusive = tomp::clause::InclusiveT; --using Indirect = tomp::clause::IndirectT; --using Init = tomp::clause::InitT; --using InReduction = tomp::clause::InReductionT; --using IsDevicePtr = tomp::clause::IsDevicePtrT; --using Lastprivate = tomp::clause::LastprivateT; --using Linear = tomp::clause::LinearT; --using Link = tomp::clause::LinkT; --using Map = tomp::clause::MapT; --using Match = tomp::clause::MatchT; --using Mergeable = tomp::clause::MergeableT; --using Message = tomp::clause::MessageT; --using Nocontext = tomp::clause::NocontextT; --using Nogroup = tomp::clause::NogroupT; --using Nontemporal = tomp::clause::NontemporalT; --using Notinbranch = tomp::clause::NotinbranchT; --using Novariants = tomp::clause::NovariantsT; --using Nowait = tomp::clause::NowaitT; --using NumTasks = tomp::clause::NumTasksT; --using NumTeams = tomp::clause::NumTeamsT; --using NumThreads = tomp::clause::NumThreadsT; --using OmpxAttribute = tomp::clause::OmpxAttributeT; --using OmpxBare = tomp::clause::OmpxBareT; --using OmpxDynCgroupMem = tomp::clause::OmpxDynCgroupMemT; --using Ordered = tomp::clause::OrderedT; --using Order = tomp::clause::OrderT; --using Partial = tomp::clause::PartialT; --using Priority = tomp::clause::PriorityT; --using Private = tomp::clause::PrivateT; --using ProcBind = tomp::clause::ProcBindT; --using Read = tomp::clause::ReadT; --using Reduction = tomp::clause::ReductionT; --using Relaxed = tomp::clause::RelaxedT; --using Release = tomp::clause::ReleaseT; --using ReverseOffload = tomp::clause::ReverseOffloadT; --using Safelen = tomp::clause::SafelenT; --using Schedule = tomp::clause::ScheduleT; --using SeqCst = tomp::clause::SeqCstT; --using Severity = tomp::clause::SeverityT; --using Shared = tomp::clause::SharedT; --using Simdlen = tomp::clause::SimdlenT; --using Simd = tomp::clause::SimdT; --using Sizes = tomp::clause::SizesT; --using TaskReduction = tomp::clause::TaskReductionT; --using ThreadLimit = tomp::clause::ThreadLimitT; --using Threads = tomp::clause::ThreadsT; --using To = tomp::clause::ToT; --using UnifiedAddress = tomp::clause::UnifiedAddressT; --using UnifiedSharedMemory = -- tomp::clause::UnifiedSharedMemoryT; --using Uniform = tomp::clause::UniformT; --using Unknown = tomp::clause::UnknownT; --using Untied = tomp::clause::UntiedT; --using Update = tomp::clause::UpdateT; --using UseDeviceAddr = tomp::clause::UseDeviceAddrT; --using UseDevicePtr = tomp::clause::UseDevicePtrT; --using UsesAllocators = tomp::clause::UsesAllocatorsT; --using Use = tomp::clause::UseT; --using Weak = tomp::clause::WeakT; --using When = tomp::clause::WhenT; --using Write = tomp::clause::WriteT; --} // namespace clause -- --using tomp::type::operator==; -- --struct CancellationConstructType { -- using EmptyTrait = std::true_type; --}; --struct Depobj { -- using EmptyTrait = std::true_type; --}; --struct Flush { -- using EmptyTrait = std::true_type; --}; --struct MemoryOrder { -- using EmptyTrait = std::true_type; --}; --struct Threadprivate { -- using EmptyTrait = std::true_type; --}; -- --using ClauseBase = tomp::ClauseT; -- --struct Clause : public ClauseBase { -- Clause(ClauseBase &&base, const parser::CharBlock source = {}) -- : ClauseBase(std::move(base)), source(source) {} -- // "source" will be ignored by tomp::type::operator==. -- parser::CharBlock source; --}; -- --template --Clause makeClause(llvm::omp::Clause id, Specific &&specific, -- parser::CharBlock source = {}) { -- return Clause(typename Clause::BaseT{id, specific}, source); --} -- --Clause makeClause(const parser::OmpClause &cls, -- semantics::SemanticsContext &semaCtx); -- --List makeClauses(const parser::OmpClauseList &clauses, -- semantics::SemanticsContext &semaCtx); -- --bool transferLocations(const List &from, List &to); --} // namespace Fortran::lower::omp -- --#endif // FORTRAN_LOWER_OPENMP_CLAUSES_H -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp llvm-project/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp ---- llvm-project.orig/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp 2024-06-12 10:43:12.620210495 -0500 -+++ llvm-project/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp 2024-06-12 10:44:09.347614281 -0500 -@@ -12,7 +12,7 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp llvm-project-aso/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp 2024-10-18 17:40:32.496992373 -0500 ++++ llvm-project-aso/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp 2024-11-23 20:39:47.180175366 -0600 +@@ -12,8 +12,8 @@ #include "DataSharingProcessor.h" -#include "Utils.h" + #include "flang/Lower/ConvertVariable.h" +#include "flang/Lower/OpenMP/Utils.h" #include "flang/Lower/PFTBuilder.h" #include "flang/Lower/SymbolMap.h" #include "flang/Optimizer/Builder/HLFIRTools.h" -@@ -47,20 +47,24 @@ +@@ -49,19 +49,24 @@ }); } @@ -4597,88 +5007,43 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/DataShari +} - privatize(clauseOps); -+void DataSharingProcessor::processStep2() { ++void DataSharingProcessor::processStep2( ++ mlir::omp::PrivateClauseOps *clauseOps) { + if (privatizationDone) + return; -+ privatize(); ++ privatize(clauseOps); insertBarrier(); + privatizationDone = true; } -void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) { -- // 'sections' lastprivate is handled by genOMP() +void DataSharingProcessor::processStep3(mlir::Operation *op, bool isLoop) { -+ // 'sections' lastprivate is handled by genOMP() + // 'sections' lastprivate is handled by genOMP() if (!mlir::isa(op)) { - insPt = firOpBuilder.saveInsertionPoint(); - copyLastPrivatize(op); -@@ -68,15 +72,12 @@ - } - - if (isLoop) { -- // push deallocs out of the loop -+ // push deallocs out of the loop - firOpBuilder.setInsertionPointAfter(op); - insertDeallocs(); - } else { -- // insert dummy instruction to mark the insertion position -- mlir::Value undefMarker = firOpBuilder.create( -- op->getLoc(), firOpBuilder.getIndexType()); -+ mlir::OpBuilder::InsertionGuard guard(firOpBuilder); - insertDeallocs(); -- firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); - } - } - -@@ -414,14 +415,15 @@ - preDeterminedSymbols); - } - --void DataSharingProcessor::privatize(mlir::omp::PrivateClauseOps *clauseOps) { -+void DataSharingProcessor::privatize() { - for (const semantics::Symbol *sym : allPrivatizedSymbols) { - if (const auto *commonDet = + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); +@@ -424,8 +429,9 @@ sym->detailsIf()) { for (const auto &mem : commonDet->objects()) -- doPrivatize(&*mem, clauseOps); + doPrivatize(&*mem, clauseOps); - } else -- doPrivatize(sym, clauseOps); -+ doPrivatize(&*mem); + } else { -+ doPrivatize(sym); + doPrivatize(sym, clauseOps); + } } } -@@ -438,8 +440,7 @@ - } - } - --void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, -- mlir::omp::PrivateClauseOps *clauseOps) { -+void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym) { - if (!useDelayedPrivatization) { - cloneSymbol(sym); - copyFirstPrivateSymbol(sym); -@@ -539,10 +540,10 @@ - return result; - }(); - -- if (clauseOps) { -- clauseOps->privatizers.push_back(mlir::SymbolRefAttr::get(privatizerOp)); -- clauseOps->privateVars.push_back(hsb.getAddr()); -- } -+ privateClauseOps.privatizers.push_back( -+ mlir::SymbolRefAttr::get(privatizerOp)); -+ privateClauseOps.privateVars.push_back(hsb.getAddr()); -+ delayedPrivSyms.push_back(sym); - +@@ -553,7 +559,6 @@ + clauseOps->privateSyms.push_back(mlir::SymbolRefAttr::get(privatizerOp)); + clauseOps->privateVars.push_back(hsb.getAddr()); + } +- symToPrivatizer[sym] = privatizerOp; } -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/DataSharingProcessor.h llvm-project/flang/lib/Lower/OpenMP/DataSharingProcessor.h ---- llvm-project.orig/flang/lib/Lower/OpenMP/DataSharingProcessor.h 2024-06-12 10:43:12.620210495 -0500 -+++ llvm-project/flang/lib/Lower/OpenMP/DataSharingProcessor.h 2024-06-12 10:44:09.347614281 -0500 + +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/DataSharingProcessor.h llvm-project-aso/flang/lib/Lower/OpenMP/DataSharingProcessor.h +--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/DataSharingProcessor.h 2024-09-13 09:46:38.858303722 -0500 ++++ llvm-project-aso/flang/lib/Lower/OpenMP/DataSharingProcessor.h 2024-11-23 20:39:47.180175366 -0600 @@ -12,9 +12,9 @@ #ifndef FORTRAN_LOWER_DATASHARINGPROCESSOR_H #define FORTRAN_LOWER_DATASHARINGPROCESSOR_H @@ -4690,39 +5055,17 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/DataShari #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/symbol.h" -@@ -89,7 +89,11 @@ +@@ -87,7 +87,9 @@ bool shouldCollectPreDeterminedSymbols; bool useDelayedPrivatization; lower::SymMap *symTable; + OMPConstructSymbolVisitor visitor; -+ mlir::omp::PrivateClauseOps privateClauseOps; -+ llvm::SmallVector delayedPrivSyms; + bool privatizationDone = false; bool needBarrier(); void collectSymbols(semantics::Symbol::Flag flag, -@@ -105,15 +109,10 @@ - void collectDefaultSymbols(); - void collectImplicitSymbols(); - void collectPreDeterminedSymbols(); -- void privatize(mlir::omp::PrivateClauseOps *clauseOps); -- void defaultPrivatize( -- mlir::omp::PrivateClauseOps *clauseOps, -- llvm::SmallVectorImpl *privateSyms); -- void implicitPrivatize( -- mlir::omp::PrivateClauseOps *clauseOps, -- llvm::SmallVectorImpl *privateSyms); -- void doPrivatize(const semantics::Symbol *sym, -- mlir::omp::PrivateClauseOps *clauseOps); -+ void privatize(); -+ void defaultPrivatize(); -+ void implicitPrivatize(); -+ void doPrivatize(const semantics::Symbol *sym); - void copyLastPrivatize(mlir::Operation *op); - void insertLastPrivateCompare(mlir::Operation *op); - void cloneSymbol(const semantics::Symbol *sym); -@@ -133,19 +132,33 @@ +@@ -125,19 +127,33 @@ bool useDelayedPrivatization = false, lower::SymMap *symTable = nullptr); @@ -4762,29 +5105,14 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/DataShari + // before the operation is created since the bounds of the MLIR OpenMP + // operation can be privatised. + void processStep1(); -+ void processStep2(); ++ void processStep2(mlir::omp::PrivateClauseOps *clauseOps = nullptr); + void processStep3(mlir::Operation *op, bool isLoop); - void setLoopIV(mlir::Value iv) { - assert(!loopIV && "Loop iteration variable already set"); -@@ -156,6 +169,14 @@ - getAllSymbolsToPrivatize() const { - return allPrivatizedSymbols; - } -+ -+ const mlir::omp::PrivateClauseOps &getPrivateClauseOps() const { -+ return privateClauseOps; -+ } -+ -+ llvm::ArrayRef getDelayedPrivSyms() const { -+ return delayedPrivSyms; -+ } - }; + void pushLoopIV(mlir::Value iv) { loopIVs.push_back(iv); } - } // namespace omp -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Decomposer.cpp llvm-project/flang/lib/Lower/OpenMP/Decomposer.cpp ---- llvm-project.orig/flang/lib/Lower/OpenMP/Decomposer.cpp 2024-06-12 10:43:12.620210495 -0500 -+++ llvm-project/flang/lib/Lower/OpenMP/Decomposer.cpp 2024-06-12 10:44:09.347614281 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/Decomposer.cpp llvm-project-aso/flang/lib/Lower/OpenMP/Decomposer.cpp +--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/Decomposer.cpp 2024-08-27 20:36:25.208173319 -0500 ++++ llvm-project-aso/flang/lib/Lower/OpenMP/Decomposer.cpp 2024-11-23 20:39:47.180175366 -0600 @@ -12,8 +12,8 @@ #include "Decomposer.h" @@ -4796,9 +5124,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Decompose #include "flang/Lower/PFTBuilder.h" #include "flang/Semantics/semantics.h" #include "flang/Tools/CrossToolHelpers.h" -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Decomposer.h llvm-project/flang/lib/Lower/OpenMP/Decomposer.h ---- llvm-project.orig/flang/lib/Lower/OpenMP/Decomposer.h 2024-06-12 10:43:12.620210495 -0500 -+++ llvm-project/flang/lib/Lower/OpenMP/Decomposer.h 2024-06-12 10:44:09.347614281 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/Decomposer.h llvm-project-aso/flang/lib/Lower/OpenMP/Decomposer.h +--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/Decomposer.h 2024-08-27 20:36:25.208173319 -0500 ++++ llvm-project-aso/flang/lib/Lower/OpenMP/Decomposer.h 2024-11-23 20:39:47.180175366 -0600 @@ -8,7 +8,7 @@ #ifndef FORTRAN_LOWER_OPENMP_DECOMPOSER_H #define FORTRAN_LOWER_OPENMP_DECOMPOSER_H @@ -4806,11 +5134,11 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Decompose -#include "Clauses.h" +#include "flang/Lower/OpenMP/Clauses.h" #include "mlir/IR/BuiltinOps.h" - #include "llvm/Frontend/OpenMP/ConstructCompositionT.h" #include "llvm/Frontend/OpenMP/ConstructDecompositionT.h" -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-project/flang/lib/Lower/OpenMP/OpenMP.cpp ---- llvm-project.orig/flang/lib/Lower/OpenMP/OpenMP.cpp 2024-06-12 10:43:12.620210495 -0500 -+++ llvm-project/flang/lib/Lower/OpenMP/OpenMP.cpp 2024-06-12 10:44:09.351614239 -0500 + #include "llvm/Frontend/OpenMP/OMP.h" +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-project-aso/flang/lib/Lower/OpenMP/OpenMP.cpp +--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/OpenMP.cpp 2024-11-23 20:25:26.839275178 -0600 ++++ llvm-project-aso/flang/lib/Lower/OpenMP/OpenMP.cpp 2024-11-23 20:39:47.180175366 -0600 @@ -13,16 +13,16 @@ #include "flang/Lower/OpenMP.h" @@ -4830,320 +5158,529 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/OpenMP.cp #include "flang/Lower/StatementContext.h" #include "flang/Lower/SymbolMap.h" #include "flang/Optimizer/Builder/BoxValue.h" -@@ -45,12 +45,25 @@ +@@ -46,6 +46,19 @@ // Code generation helper functions //===----------------------------------------------------------------------===// -+static mlir::omp::TargetOp findParentTargetOp(mlir::OpBuilder &builder) { -+ mlir::Operation *parentOp = builder.getBlock()->getParentOp(); -+ if (!parentOp) -+ return nullptr; ++static void genOMPDispatch(lower::AbstractConverter &converter, ++ lower::SymMap &symTable, ++ semantics::SemanticsContext &semaCtx, ++ lower::pft::Evaluation &eval, mlir::Location loc, ++ const ConstructQueue &queue, ++ ConstructQueue::const_iterator item); ++ ++static void processHostEvalClauses(lower::AbstractConverter &converter, ++ semantics::SemanticsContext &semaCtx, ++ lower::StatementContext &stmtCtx, ++ lower::pft::Evaluation &eval, ++ mlir::Location loc); ++ + namespace { + /// Structure holding the information needed to create and bind entry block + /// arguments associated to a single clause. +@@ -64,6 +77,7 @@ + /// Structure holding the information needed to create and bind entry block + /// arguments associated to all clauses that can define them. + struct EntryBlockArgs { ++ llvm::ArrayRef hostEvalVars; + EntryBlockArgsEntry inReduction; + EntryBlockArgsEntry map; + EntryBlockArgsEntry priv; +@@ -86,18 +100,146 @@ + + auto getVars() const { + return llvm::concat( +- inReduction.vars, map.vars, priv.vars, reduction.vars, ++ hostEvalVars, inReduction.vars, map.vars, priv.vars, reduction.vars, + taskReduction.vars, useDeviceAddr.vars, useDevicePtr.vars); + } + }; + -+ auto targetOp = llvm::dyn_cast(parentOp); -+ if (!targetOp) -+ targetOp = parentOp->getParentOfType(); ++/// Structure holding information that is needed to pass host-evaluated ++/// information to later lowering stages. ++class HostEvalInfo { ++public: ++ // Allow this function access to private members in order to initialize them. ++ friend void ::processHostEvalClauses(lower::AbstractConverter &, ++ semantics::SemanticsContext &, ++ lower::StatementContext &, ++ lower::pft::Evaluation &, ++ mlir::Location); ++ ++ /// Fill \c vars with values stored in \c ops. ++ /// ++ /// The order in which values are stored matches the one expected by \see ++ /// bindOperands(). ++ void collectValues(llvm::SmallVectorImpl &vars) const { ++ vars.append(ops.loopLowerBounds); ++ vars.append(ops.loopUpperBounds); ++ vars.append(ops.loopSteps); + -+ return targetOp; -+} ++ if (ops.numTeamsLower) ++ vars.push_back(ops.numTeamsLower); + - static void genOMPDispatch(lower::AbstractConverter &converter, - lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, mlir::Location loc, - const ConstructQueue &queue, -- ConstructQueue::iterator item); -+ ConstructQueue::iterator item, -+ DataSharingProcessor *dsp = nullptr); - - static lower::pft::Evaluation * - getCollapsedLoopEval(lower::pft::Evaluation &eval, int collapseValue) { -@@ -79,6 +92,75 @@ - converter.genEval(e); - } - -+//===----------------------------------------------------------------------===// -+// HostClausesInsertionGuard -+//===----------------------------------------------------------------------===// ++ if (ops.numTeamsUpper) ++ vars.push_back(ops.numTeamsUpper); + -+/// If the insertion point of the builder is located inside of an omp.target -+/// region, this RAII guard moves the insertion point to just before that -+/// omp.target operation and then restores the original insertion point when -+/// destroyed. If not currently inserting inside an omp.target, it remains -+/// unchanged. -+class HostClausesInsertionGuard { -+public: -+ HostClausesInsertionGuard(mlir::OpBuilder &builder) : builder(builder) { -+ targetOp = findParentTargetOp(builder); -+ if (targetOp) { -+ ip = builder.saveInsertionPoint(); -+ builder.setInsertionPoint(targetOp); -+ } ++ if (ops.numThreads) ++ vars.push_back(ops.numThreads); ++ ++ if (ops.threadLimit) ++ vars.push_back(ops.threadLimit); + } + -+ ~HostClausesInsertionGuard() { -+ if (ip.isSet()) { -+ fixupExtractedHostOps(); -+ builder.restoreInsertionPoint(ip); -+ } ++ /// Update \c ops, replacing all values with the corresponding block argument ++ /// in \c args. ++ /// ++ /// The order in which values are stored in \c args is the same as the one ++ /// used by \see collectValues(). ++ void bindOperands(llvm::ArrayRef args) { ++ assert(args.size() == ++ ops.loopLowerBounds.size() + ops.loopUpperBounds.size() + ++ ops.loopSteps.size() + (ops.numTeamsLower ? 1 : 0) + ++ (ops.numTeamsUpper ? 1 : 0) + (ops.numThreads ? 1 : 0) + ++ (ops.threadLimit ? 1 : 0) && ++ "invalid block argument list"); ++ int argIndex = 0; ++ for (size_t i = 0; i < ops.loopLowerBounds.size(); ++i) ++ ops.loopLowerBounds[i] = args[argIndex++]; ++ ++ for (size_t i = 0; i < ops.loopUpperBounds.size(); ++i) ++ ops.loopUpperBounds[i] = args[argIndex++]; ++ ++ for (size_t i = 0; i < ops.loopSteps.size(); ++i) ++ ops.loopSteps[i] = args[argIndex++]; ++ ++ if (ops.numTeamsLower) ++ ops.numTeamsLower = args[argIndex++]; ++ ++ if (ops.numTeamsUpper) ++ ops.numTeamsUpper = args[argIndex++]; ++ ++ if (ops.numThreads) ++ ops.numThreads = args[argIndex++]; ++ ++ if (ops.threadLimit) ++ ops.threadLimit = args[argIndex++]; + } + -+private: -+ mlir::OpBuilder &builder; -+ mlir::OpBuilder::InsertPoint ip; -+ mlir::omp::TargetOp targetOp; -+ -+ /// Fixup any uses of target region block arguments that we have just created -+ /// outside of the target region, and replace them by their host values. -+ void fixupExtractedHostOps() { -+ auto useOutsideTargetRegion = [](mlir::OpOperand &operand) { -+ if (mlir::Operation *owner = operand.getOwner()) -+ return !owner->getParentOfType(); ++ /// Update \p clauseOps and \p ivOut with the corresponding host-evaluated ++ /// values and Fortran symbols, respectively, if they have already been ++ /// initialized but not yet applied. ++ /// ++ /// \returns whether an update was performed. If not, these clauses were not ++ /// evaluated in the host device. ++ bool apply(mlir::omp::LoopNestOperands &clauseOps, ++ llvm::SmallVectorImpl &ivOut) { ++ if (iv.empty() || loopNestApplied) { ++ loopNestApplied = true; + return false; -+ }; ++ } + -+ mlir::OperandRange map = targetOp.getMapOperands(); -+ for (mlir::BlockArgument arg : targetOp.getRegion().getArguments()) { -+ mlir::Value hostVal = map[arg.getArgNumber()] -+ .getDefiningOp() -+ .getVarPtr(); -+ -+ // Replace instances of omp.target block arguments used outside with their -+ // corresponding host value. -+ arg.replaceUsesWithIf(hostVal, [&](mlir::OpOperand &operand) -> bool { -+ // If the use is an hlfir.declare, we need to search for the matching -+ // one within host code. -+ if (auto declareOp = llvm::dyn_cast_if_present( -+ operand.getOwner())) { -+ if (auto hostDeclareOp = hostVal.getDefiningOp()) { -+ declareOp->replaceUsesWithIf(hostDeclareOp.getResults(), -+ useOutsideTargetRegion); -+ } else if (auto hostBoxOp = hostVal.getDefiningOp()) { -+ declareOp->replaceUsesWithIf(hostBoxOp.getVal() -+ .getDefiningOp() -+ .getResults(), -+ useOutsideTargetRegion); -+ } -+ } -+ return useOutsideTargetRegion(operand); -+ }); ++ loopNestApplied = true; ++ clauseOps.loopLowerBounds = ops.loopLowerBounds; ++ clauseOps.loopUpperBounds = ops.loopUpperBounds; ++ clauseOps.loopSteps = ops.loopSteps; ++ ivOut.append(iv); ++ return true; ++ } ++ ++ /// Update \p clauseOps with the corresponding host-evaluated values if they ++ /// have already been initialized but not yet applied. ++ /// ++ /// \returns whether an update was performed. If not, these clauses were not ++ /// evaluated in the host device. ++ bool apply(mlir::omp::ParallelOperands &clauseOps) { ++ if (!ops.numThreads || parallelApplied) { ++ parallelApplied = true; ++ return false; + } ++ ++ parallelApplied = true; ++ clauseOps.numThreads = ops.numThreads; ++ return true; + } -+}; + - static fir::GlobalOp globalInitialization(lower::AbstractConverter &converter, - fir::FirOpBuilder &firOpBuilder, - const semantics::Symbol &sym, -@@ -226,6 +308,27 @@ - return storeOp; - } - -+static bool evalHasSiblings(lower::pft::Evaluation &eval) { -+ return eval.parent.visit(common::visitors{ -+ [&](const lower::pft::Program &parent) { -+ return parent.getUnits().size() + parent.getCommonBlocks().size() > 1; -+ }, -+ [&](const lower::pft::Evaluation &parent) { -+ for (auto &sibling : *parent.evaluationList) -+ if (&sibling != &eval && !sibling.isEndStmt()) -+ return true; -+ -+ return false; -+ }, -+ [&](const auto &parent) { -+ for (auto &sibling : parent.evaluationList) -+ if (&sibling != &eval && !sibling.isEndStmt()) -+ return true; -+ -+ return false; -+ }}); -+} ++ /// Update \p clauseOps with the corresponding host-evaluated values if they ++ /// have already been initialized. ++ /// ++ /// \returns whether an update was performed. If not, these clauses were not ++ /// evaluated in the host device. ++ bool apply(mlir::omp::TeamsOperands &clauseOps) { ++ if (!ops.numTeamsLower && !ops.numTeamsUpper && !ops.threadLimit) ++ return false; + - // This helper function implements the functionality of "promoting" - // non-CPTR arguments of use_device_ptr to use_device_addr - // arguments (automagic conversion of use_device_ptr -> -@@ -414,24 +517,6 @@ - } - - static void --genReductionVars(mlir::Operation *op, lower::AbstractConverter &converter, -- mlir::Location &loc, -- llvm::ArrayRef reductionArgs, -- llvm::ArrayRef reductionTypes) { -- fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); -- llvm::SmallVector blockArgLocs(reductionArgs.size(), loc); -- -- mlir::Block *entryBlock = firOpBuilder.createBlock( -- &op->getRegion(0), {}, reductionTypes, blockArgLocs); -- -- // Bind the reduction arguments to their block arguments. -- for (auto [arg, prv] : -- llvm::zip_equal(reductionArgs, entryBlock->getArguments())) { -- converter.bindSymbol(*arg, prv); -- } --} -- --static void - markDeclareTarget(mlir::Operation *op, lower::AbstractConverter &converter, - mlir::omp::DeclareTargetCaptureClause captureClause, - mlir::omp::DeclareTargetDeviceType deviceType) { -@@ -502,11 +587,6 @@ - : converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc), - eval(eval), dir(dir) {} - -- OpWithBodyGenInfo &setOuterCombined(bool value) { -- outerCombined = value; -- return *this; -- } -- - OpWithBodyGenInfo &setClauses(const List *value) { - clauses = value; - return *this; -@@ -518,8 +598,8 @@ - } - - OpWithBodyGenInfo & -- setReductions(llvm::SmallVectorImpl *value1, -- llvm::SmallVectorImpl *value2) { -+ setReductions(llvm::ArrayRef *value1, -+ llvm::ArrayRef *value2) { - reductionSymbols = value1; - reductionTypes = value2; - return *this; -@@ -542,16 +622,14 @@ - lower::pft::Evaluation &eval; - /// [in] leaf directive for which to generate the op body. - llvm::omp::Directive dir; -- /// [in] is this an outer operation - prevents privatization. -- bool outerCombined = false; - /// [in] list of clauses to process. - const List *clauses = nullptr; - /// [in] if provided, processes the construct's data-sharing attributes. - DataSharingProcessor *dsp = nullptr; - /// [in] if provided, list of reduction symbols -- llvm::SmallVectorImpl *reductionSymbols = nullptr; -+ llvm::ArrayRef *reductionSymbols = nullptr; - /// [in] if provided, list of reduction types -- llvm::SmallVectorImpl *reductionTypes = nullptr; -+ llvm::ArrayRef *reductionTypes = nullptr; - /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block - /// is created in the region. - GenOMPRegionEntryCBFn genRegionEntryCB = nullptr; -@@ -568,12 +646,6 @@ - ConstructQueue::iterator item) { - fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); - -- auto insertMarker = [](fir::FirOpBuilder &builder) { -- mlir::Value undef = builder.create(builder.getUnknownLoc(), -- builder.getIndexType()); -- return undef.getDefiningOp(); -- }; -- - // If an argument for the region is provided then create the block with that - // argument. Also update the symbol's address with the mlir argument value. - // e.g. For loops the argument is the induction variable. And all further -@@ -586,12 +658,13 @@ - firOpBuilder.createBlock(&op.getRegion(0)); - return {}; - }(); ++ clauseOps.numTeamsLower = ops.numTeamsLower; ++ clauseOps.numTeamsUpper = ops.numTeamsUpper; ++ clauseOps.threadLimit = ops.threadLimit; ++ return true; ++ } + - // Mark the earliest insertion point. -- mlir::Operation *marker = insertMarker(firOpBuilder); -+ auto marker = firOpBuilder.saveInsertionPoint(); - - // If it is an unstructured region and is not the outer region of a combined - // construct, create empty blocks for all evaluations. -- if (info.eval.lowerAsUnstructured() && !info.outerCombined) -+ if (info.eval.lowerAsUnstructured()) - lower::createEmptyRegionBlocks( - firOpBuilder, info.eval.getNestedEvaluations()); - -@@ -599,22 +672,23 @@ - // code will use the right symbols. - bool isLoop = llvm::omp::getDirectiveAssociation(info.dir) == - llvm::omp::Association::Loop; -- bool privatize = info.clauses && !info.outerCombined; -+ bool privatize = info.clauses; - -- firOpBuilder.setInsertionPoint(marker); -+ firOpBuilder.restoreInsertionPoint(marker); - std::optional tempDsp; - if (privatize) { - if (!info.dsp) { - tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval, - Fortran::lower::omp::isLastItemInQueue(item, queue)); - tempDsp->processStep1(); -+ tempDsp->processStep2(); - } - } - - if (info.dir == llvm::omp::Directive::OMPD_parallel) { - threadPrivatizeVars(info.converter, info.eval); - if (info.clauses) { -- firOpBuilder.setInsertionPoint(marker); -+ firOpBuilder.restoreInsertionPoint(marker); - ClauseProcessor(info.converter, info.semaCtx, *info.clauses) - .processCopyin(); - } -@@ -622,7 +696,7 @@ ++private: ++ mlir::omp::HostEvaluatedOperands ops; ++ llvm::SmallVector iv; ++ bool loopNestApplied = false, parallelApplied = false; ++}; + } // namespace - if (ConstructQueue::iterator next = std::next(item); next != queue.end()) { - genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval, -- info.loc, queue, next); -+ info.loc, queue, next, info.dsp); - } else { - // genFIR(Evaluation&) tries to patch up unterminated blocks, causing - // a lot of complications for our approach if the terminator generation -@@ -630,7 +704,7 @@ - // delete it. - firOpBuilder.setInsertionPointToEnd(&op.getRegion(0).back()); - auto *temp = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc); -- firOpBuilder.setInsertionPointAfter(marker); -+ firOpBuilder.restoreInsertionPoint(marker); - genNestedEvaluations(info.converter, info.eval); - temp->erase(); - } -@@ -695,17 +769,17 @@ +-static void genOMPDispatch(lower::AbstractConverter &converter, +- lower::SymMap &symTable, +- semantics::SemanticsContext &semaCtx, +- lower::pft::Evaluation &eval, mlir::Location loc, +- const ConstructQueue &queue, +- ConstructQueue::const_iterator item); ++/// Stack of \see HostEvalInfo to represent the current nest of \c omp.target ++/// operations being created. ++/// ++/// The current implementation prevents nested 'target' regions from breaking ++/// the handling of the outer region by keeping a stack of information ++/// structures, but it will probably still require some further work to support ++/// reverse offloading. ++static llvm::SmallVector hostEvalInfo; - if (!info.dsp) { - assert(tempDsp.has_value()); -- tempDsp->processStep2(privatizationTopLevelOp, isLoop); -+ tempDsp->processStep3(privatizationTopLevelOp, isLoop); - } else { - if (isLoop && regionArgs.size() > 0) - info.dsp->setLoopIV(info.converter.getSymbolAddress(*regionArgs[0])); -- info.dsp->processStep2(privatizationTopLevelOp, isLoop); -+ info.dsp->processStep3(privatizationTopLevelOp, isLoop); - } - } - } + /// Bind symbols to their corresponding entry block arguments. + /// +@@ -220,6 +362,8 @@ + }; -- firOpBuilder.setInsertionPointAfter(marker); -- marker->erase(); -+ firOpBuilder.setInsertionPoint(marker.getBlock(), -+ std::prev(marker.getPoint())); + // Process in clause name alphabetical order to match block arguments order. ++ // Do not bind host_eval variables because they cannot be used inside of the ++ // corresponding region, except for very specific cases handled separately. + bindPrivateLike(args.inReduction.syms, args.inReduction.vars, + op.getInReductionBlockArgs()); + bindMapLike(args.map.syms, op.getMapBlockArgs()); +@@ -257,6 +401,246 @@ + }); } - static void genBodyOfTargetDataOp( -@@ -808,6 +882,9 @@ - auto *regionBlock = firOpBuilder.createBlock(®ion, {}, allRegionArgTypes, - allRegionArgLocs); - -+ if (!enableDelayedPrivatizationStaging) -+ dsp.processStep2(); ++/// Get the directive enumeration value corresponding to the given OpenMP ++/// construct PFT node. ++llvm::omp::Directive ++extractOmpDirective(const parser::OpenMPConstruct &ompConstruct) { ++ return common::visit( ++ common::visitors{ ++ [](const parser::OpenMPAllocatorsConstruct &c) { ++ return llvm::omp::OMPD_allocators; ++ }, ++ [](const parser::OpenMPAtomicConstruct &c) { ++ return llvm::omp::OMPD_atomic; ++ }, ++ [](const parser::OpenMPBlockConstruct &c) { ++ return std::get( ++ std::get(c.t).t) ++ .v; ++ }, ++ [](const parser::OpenMPCriticalConstruct &c) { ++ return llvm::omp::OMPD_critical; ++ }, ++ [](const parser::OpenMPDeclarativeAllocate &c) { ++ return llvm::omp::OMPD_allocate; ++ }, ++ [](const parser::OpenMPExecutableAllocate &c) { ++ return llvm::omp::OMPD_allocate; ++ }, ++ [](const parser::OpenMPLoopConstruct &c) { ++ return std::get( ++ std::get(c.t).t) ++ .v; ++ }, ++ [](const parser::OpenMPSectionConstruct &c) { ++ return llvm::omp::OMPD_section; ++ }, ++ [](const parser::OpenMPSectionsConstruct &c) { ++ return std::get( ++ std::get(c.t).t) ++ .v; ++ }, ++ [](const parser::OpenMPStandaloneConstruct &c) { ++ return common::visit( ++ common::visitors{ ++ [](const parser::OpenMPSimpleStandaloneConstruct &c) { ++ return std::get(c.t) ++ .v; ++ }, ++ [](const parser::OpenMPFlushConstruct &c) { ++ return llvm::omp::OMPD_flush; ++ }, ++ [](const parser::OpenMPCancelConstruct &c) { ++ return llvm::omp::OMPD_cancel; ++ }, ++ [](const parser::OpenMPCancellationPointConstruct &c) { ++ return llvm::omp::OMPD_cancellation_point; ++ }, ++ [](const parser::OpenMPDepobjConstruct &c) { ++ return llvm::omp::OMPD_depobj; ++ }}, ++ c.u); ++ }}, ++ ompConstruct.u); ++} + - // Clones the `bounds` placing them inside the target region and returns them. - auto cloneBound = [&](mlir::Value bound) { - if (mlir::isMemoryEffectFree(bound.getDefiningOp())) { -@@ -871,7 +948,7 @@ - } ++/// Populate the global \see hostEvalInfo after processing clauses for the given ++/// \p eval OpenMP target construct, or nested constructs, if these must be ++/// evaluated outside of the target region per the spec. ++/// ++/// In particular, this will ensure that in 'target teams' and equivalent nested ++/// constructs, the \c thread_limit and \c num_teams clauses will be evaluated ++/// in the host. Additionally, loop bounds, steps and the \c num_threads clause ++/// will also be evaluated in the host if a target SPMD construct is detected ++/// (i.e. 'target teams distribute parallel do [simd]' or equivalent nesting). ++/// ++/// The result, stored as a global, is intended to be used to populate the \c ++/// host_eval operands of the associated \c omp.target operation, and also to be ++/// checked and used by later lowering steps to populate the corresponding ++/// operands of the \c omp.teams, \c omp.parallel or \c omp.loop_nest ++/// operations. ++static void processHostEvalClauses(lower::AbstractConverter &converter, ++ semantics::SemanticsContext &semaCtx, ++ lower::StatementContext &stmtCtx, ++ lower::pft::Evaluation &eval, ++ mlir::Location loc) { ++ // Obtain the list of clauses of the given OpenMP block or loop construct ++ // evaluation. Other evaluations passed to this lambda keep `clauses` ++ // unchanged. ++ auto extractClauses = [&semaCtx](lower::pft::Evaluation &eval, ++ List &clauses) { ++ const auto *ompEval = eval.getIf(); ++ if (!ompEval) ++ return; ++ ++ const parser::OmpClauseList *beginClauseList = nullptr; ++ const parser::OmpClauseList *endClauseList = nullptr; ++ common::visit( ++ common::visitors{ ++ [&](const parser::OpenMPBlockConstruct &ompConstruct) { ++ const auto &beginDirective = ++ std::get(ompConstruct.t); ++ beginClauseList = ++ &std::get(beginDirective.t); ++ endClauseList = &std::get( ++ std::get(ompConstruct.t).t); ++ }, ++ [&](const parser::OpenMPLoopConstruct &ompConstruct) { ++ const auto &beginDirective = ++ std::get(ompConstruct.t); ++ beginClauseList = ++ &std::get(beginDirective.t); ++ ++ if (auto &endDirective = ++ std::get>( ++ ompConstruct.t)) ++ endClauseList = ++ &std::get(endDirective->t); ++ }, ++ [&](const auto &) {}}, ++ ompEval->u); ++ ++ assert(beginClauseList && "expected begin directive"); ++ clauses.append(makeClauses(*beginClauseList, semaCtx)); ++ ++ if (endClauseList) ++ clauses.append(makeClauses(*endClauseList, semaCtx)); ++ }; ++ ++ // Return the directive that is immediately nested inside of the given ++ // `parent` evaluation, if it is its only non-end-statement nested evaluation ++ // and it represents an OpenMP construct. ++ auto extractOnlyOmpNestedDir = [](lower::pft::Evaluation &parent) ++ -> std::optional { ++ if (!parent.hasNestedEvaluations()) ++ return std::nullopt; ++ ++ llvm::omp::Directive dir; ++ auto &nested = parent.getFirstNestedEvaluation(); ++ if (const auto *ompEval = nested.getIf()) ++ dir = extractOmpDirective(*ompEval); ++ else ++ return std::nullopt; ++ ++ for (auto &sibling : parent.getNestedEvaluations()) ++ if (&sibling != &nested && !sibling.isEndStmt()) ++ return std::nullopt; ++ ++ return dir; ++ }; ++ ++ // Process the given evaluation assuming it's part of a 'target' construct or ++ // captured by one, and store results in the global `hostEvalInfo`. ++ std::function &)> ++ processEval; ++ processEval = [&](lower::pft::Evaluation &eval, const List &clauses) { ++ using namespace llvm::omp; ++ ClauseProcessor cp(converter, semaCtx, clauses); ++ ++ // Call `processEval` recursively with the immediately nested evaluation and ++ // its corresponding clauses if there is a single nested evaluation ++ // representing an OpenMP directive that passes the given test. ++ auto processSingleNestedIf = [&](llvm::function_ref test) { ++ std::optional nestedDir = extractOnlyOmpNestedDir(eval); ++ if (!nestedDir || !test(*nestedDir)) ++ return; ++ ++ lower::pft::Evaluation &nestedEval = eval.getFirstNestedEvaluation(); ++ List nestedClauses; ++ extractClauses(nestedEval, nestedClauses); ++ processEval(nestedEval, nestedClauses); ++ }; ++ ++ const auto *ompEval = eval.getIf(); ++ if (!ompEval) ++ return; ++ ++ HostEvalInfo &hostInfo = hostEvalInfo.back(); ++ ++ switch (extractOmpDirective(*ompEval)) { ++ // Cases where 'teams' and target SPMD clauses might be present. ++ case OMPD_teams_distribute_parallel_do: ++ case OMPD_teams_distribute_parallel_do_simd: ++ cp.processThreadLimit(stmtCtx, hostInfo.ops); ++ [[fallthrough]]; ++ case OMPD_target_teams_distribute_parallel_do: ++ case OMPD_target_teams_distribute_parallel_do_simd: ++ cp.processNumTeams(stmtCtx, hostInfo.ops); ++ [[fallthrough]]; ++ case OMPD_distribute_parallel_do: ++ case OMPD_distribute_parallel_do_simd: ++ cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); ++ cp.processNumThreads(stmtCtx, hostInfo.ops); ++ break; ++ ++ // Cases where 'teams' clauses might be present, and target SPMD is ++ // possible by looking at nested evaluations. ++ case OMPD_teams: ++ cp.processThreadLimit(stmtCtx, hostInfo.ops); ++ [[fallthrough]]; ++ case OMPD_target_teams: ++ cp.processNumTeams(stmtCtx, hostInfo.ops); ++ processSingleNestedIf([](Directive nestedDir) { ++ return nestedDir == OMPD_distribute_parallel_do || ++ nestedDir == OMPD_distribute_parallel_do_simd; ++ }); ++ break; ++ ++ // Cases where only 'teams' host-evaluated clauses might be present. ++ case OMPD_teams_distribute: ++ case OMPD_teams_distribute_simd: ++ cp.processThreadLimit(stmtCtx, hostInfo.ops); ++ [[fallthrough]]; ++ case OMPD_target_teams_distribute: ++ case OMPD_target_teams_distribute_simd: ++ cp.processNumTeams(stmtCtx, hostInfo.ops); ++ break; ++ ++ // Standalone 'target' case. ++ case OMPD_target: { ++ processSingleNestedIf( ++ [](Directive nestedDir) { return topTeamsSet.test(nestedDir); }); ++ break; ++ } ++ default: ++ break; ++ } ++ }; ++ ++ assert(!hostEvalInfo.empty() && "expected HOST_EVAL info structure"); ++ ++ const auto *ompEval = eval.getIf(); ++ assert(ompEval && ++ llvm::omp::allTargetSet.test(extractOmpDirective(*ompEval)) && ++ "expected TARGET construct evaluation"); ++ ++ // Use the whole list of clauses passed to the construct here, rather than the ++ // ones only applied to omp.target. ++ List clauses; ++ extractClauses(eval, clauses); ++ processEval(eval, clauses); ++} ++ + static lower::pft::Evaluation * + getCollapsedLoopEval(lower::pft::Evaluation &eval, int collapseValue) { + // Return the Evaluation of the innermost collapsed loop, or the current one +@@ -639,11 +1023,11 @@ + + llvm::SmallVector types; + llvm::SmallVector locs; +- unsigned numVars = args.inReduction.vars.size() + args.map.vars.size() + +- args.priv.vars.size() + args.reduction.vars.size() + +- args.taskReduction.vars.size() + +- args.useDeviceAddr.vars.size() + +- args.useDevicePtr.vars.size(); ++ unsigned numVars = ++ args.hostEvalVars.size() + args.inReduction.vars.size() + ++ args.map.vars.size() + args.priv.vars.size() + ++ args.reduction.vars.size() + args.taskReduction.vars.size() + ++ args.useDeviceAddr.vars.size() + args.useDevicePtr.vars.size(); + types.reserve(numVars); + locs.reserve(numVars); + +@@ -656,6 +1040,7 @@ + + // Populate block arguments in clause name alphabetical order to match + // expected order by the BlockArgOpenMPOpInterface. ++ extractTypeLoc(args.hostEvalVars); + extractTypeLoc(args.inReduction.vars); + extractTypeLoc(args.map.vars); + extractTypeLoc(args.priv.vars); +@@ -784,6 +1169,7 @@ + firOpBuilder.createBlock(&op.getRegion(0)); + return {}; + }(); ++ + // Mark the earliest insertion point. + mlir::Operation *marker = insertMarker(firOpBuilder); + +@@ -806,6 +1192,7 @@ + tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval, + Fortran::lower::omp::isLastItemInQueue(item, queue)); + tempDsp->processStep1(); ++ tempDsp->processStep2(); + } + + if (info.dir == llvm::omp::Directive::OMPD_parallel) { +@@ -895,14 +1282,14 @@ - for (auto [argIndex, argSymbol] : -- llvm::enumerate(dsp.getAllSymbolsToPrivatize())) { -+ llvm::enumerate(dsp.getDelayedPrivSyms())) { - argIndex = mapSyms.size() + argIndex; + if (!info.dsp) { + assert(tempDsp.has_value()); +- tempDsp->processStep2(privatizationTopLevelOp, isLoop); ++ tempDsp->processStep3(privatizationTopLevelOp, isLoop); + } else { + if (isLoop && regionArgs.size() > 0) { + for (const auto ®ionArg : regionArgs) { + info.dsp->pushLoopIV(info.converter.getSymbolAddress(*regionArg)); + } + } +- info.dsp->processStep2(privatizationTopLevelOp, isLoop); ++ info.dsp->processStep3(privatizationTopLevelOp, isLoop); + } + } + } +@@ -997,7 +1384,13 @@ - const mlir::BlockArgument &arg = region.getArgument(argIndex); -@@ -962,7 +1039,7 @@ + mlir::Region ®ion = targetOp.getRegion(); + mlir::Block *entryBlock = genEntryBlock(converter, args, region); ++ ++ if (!enableDelayedPrivatizationStaging) ++ dsp.processStep2(); ++ + bindEntryBlockArgs(converter, targetOp, args); ++ if (!hostEvalInfo.empty()) ++ hostEvalInfo.back().bindOperands(argIface.getHostEvalBlockArgs()); + + // Check if cloning the bounds introduced any dependency on the outer region. + // If so, then either clone them as well if they are MemoryEffectFree, or else +@@ -1008,9 +1401,11 @@ + while (!valuesDefinedAbove.empty()) { + for (mlir::Value val : valuesDefinedAbove) { + mlir::Operation *valOp = val.getDefiningOp(); ++ assert(valOp != nullptr); + if (mlir::isMemoryEffectFree(valOp)) { + mlir::Operation *clonedOp = valOp->clone(); + entryBlock->push_front(clonedOp); ++ assert(clonedOp->getNumResults() == 1); + val.replaceUsesWithIf(clonedOp->getResult(0), + [entryBlock](mlir::OpOperand &use) { + return use.getOwner()->getBlock() == entryBlock; +@@ -1096,7 +1491,7 @@ genNestedEvaluations(converter, eval); } @@ -5152,70 +5689,72 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/OpenMP.cp } template -@@ -1026,20 +1103,28 @@ - static void genParallelClauses( - lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, - lower::StatementContext &stmtCtx, const List &clauses, -- mlir::Location loc, bool processReduction, -+ mlir::Location loc, bool evalOutsideTarget, - mlir::omp::ParallelClauseOps &clauseOps, -+ mlir::omp::NumThreadsClauseOps &numThreadsClauseOps, - llvm::SmallVectorImpl &reductionTypes, - llvm::SmallVectorImpl &reductionSyms) { +@@ -1173,7 +1568,10 @@ + mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps, + llvm::SmallVectorImpl &iv) { + ClauseProcessor cp(converter, semaCtx, clauses); +- cp.processCollapse(loc, eval, clauseOps, iv); ++ ++ if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps, iv)) ++ cp.processCollapse(loc, eval, clauseOps, iv); ++ + clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); + } + +@@ -1215,7 +1613,10 @@ ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); - cp.processDefault(); cp.processIf(llvm::omp::Directive::OMPD_parallel, clauseOps); - cp.processNumThreads(stmtCtx, clauseOps); -- cp.processProcBind(clauseOps); - -- if (processReduction) { -- cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms); -+ // Don't store num_threads clause operators into clauseOps because then they -+ // would always be added to the omp.parallel operation during its creation. -+ // We might need to attach them to the parent omp.target. -+ if (evalOutsideTarget) { -+ HostClausesInsertionGuard guard(converter.getFirOpBuilder()); -+ cp.processNumThreads(stmtCtx, numThreadsClauseOps); -+ } else { -+ cp.processNumThreads(stmtCtx, numThreadsClauseOps); - } + -+ cp.processProcBind(clauseOps); -+ cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms); ++ if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps)) ++ cp.processNumThreads(stmtCtx, clauseOps); ++ + cp.processProcBind(clauseOps); + cp.processReduction(loc, clauseOps, reductionSyms); + } +@@ -1257,13 +1658,12 @@ + cp.processAllocate(clauseOps); + cp.processCopyprivate(loc, clauseOps); + cp.processNowait(clauseOps); +- // TODO Support delayed privatization. } - static void genSectionsClauses(lower::AbstractConverter &converter, -@@ -1083,7 +1168,7 @@ static void genTargetClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, - lower::StatementContext &stmtCtx, const List &clauses, -- mlir::Location loc, bool processHostOnlyClauses, bool processReduction, -+ mlir::Location loc, bool processHostOnlyClauses, - mlir::omp::TargetClauseOps &clauseOps, - llvm::SmallVectorImpl &mapSyms, - llvm::SmallVectorImpl &mapLocs, -@@ -1103,19 +1188,22 @@ - cp.processIsDevicePtr(clauseOps, devicePtrTypes, devicePtrLocs, - devicePtrSyms); - cp.processMap(loc, stmtCtx, clauseOps, &mapSyms, &mapLocs, &mapTypes); -- cp.processThreadLimit(stmtCtx, clauseOps); - - if (processHostOnlyClauses) - cp.processNowait(clauseOps); +- lower::StatementContext &stmtCtx, const List &clauses, +- mlir::Location loc, bool processHostOnlyClauses, ++ lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval, ++ const List &clauses, mlir::Location loc, + mlir::omp::TargetOperands &clauseOps, + llvm::SmallVectorImpl &hasDeviceAddrSyms, + llvm::SmallVectorImpl &isDevicePtrSyms, +@@ -1272,22 +1672,27 @@ + cp.processDepend(clauseOps); + cp.processDevice(stmtCtx, clauseOps); + cp.processHasDeviceAddr(clauseOps, hasDeviceAddrSyms); ++ if (!hostEvalInfo.empty()) { ++ // Only process host_eval if compiling for the host device. ++ processHostEvalClauses(converter, semaCtx, stmtCtx, eval, loc); ++ hostEvalInfo.back().collectValues(clauseOps.hostEvalVars); ++ } + cp.processIf(llvm::omp::Directive::OMPD_target, clauseOps); + cp.processIsDevicePtr(clauseOps, isDevicePtrSyms); + cp.processMap(loc, stmtCtx, clauseOps, &mapSyms); +- +- if (processHostOnlyClauses) +- cp.processNowait(clauseOps); +- ++ cp.processNowait(clauseOps); + cp.processThreadLimit(stmtCtx, clauseOps); -+ cp.processThreadLimit(stmtCtx, clauseOps); -+ cp.processTODO(loc, -- llvm::omp::Directive::OMPD_target); -+ clause::InReduction, clause::UsesAllocators>( -+ loc, llvm::omp::Directive::OMPD_target); -+ + clause::InReduction, clause::UsesAllocators>( + loc, llvm::omp::Directive::OMPD_target); + + // TODO: Re-enable check after removing downstream early privatization support + // for `target`. - ++ // `target private(..)` is only supported in delayed privatization mode. - if (!enableDelayedPrivatizationStaging) - cp.processTODO(loc, llvm::omp::Directive::OMPD_target); @@ -5224,425 +5763,134 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/OpenMP.cp } static void genTargetDataClauses( -@@ -1207,11 +1295,14 @@ - loc, llvm::omp::Directive::OMPD_taskwait); +@@ -1347,7 +1752,6 @@ + cp.processMergeable(clauseOps); + cp.processPriority(stmtCtx, clauseOps); + cp.processUntied(clauseOps); +- // TODO Support delayed privatization. + + cp.processTODO(loc, llvm::omp::Directive::OMPD_task); +@@ -1381,19 +1785,21 @@ + cp.processNowait(clauseOps); } -static void genTeamsClauses(lower::AbstractConverter &converter, - semantics::SemanticsContext &semaCtx, - lower::StatementContext &stmtCtx, - const List &clauses, mlir::Location loc, -- mlir::omp::TeamsClauseOps &clauseOps) { -+static void -+genTeamsClauses(lower::AbstractConverter &converter, -+ semantics::SemanticsContext &semaCtx, -+ lower::StatementContext &stmtCtx, const List &clauses, -+ mlir::Location loc, bool evalOutsideTarget, -+ mlir::omp::TeamsClauseOps &clauseOps, -+ mlir::omp::NumTeamsClauseOps &numTeamsClauseOps, -+ mlir::omp::ThreadLimitClauseOps &threadLimitClauseOps) { +- mlir::omp::TeamsOperands &clauseOps) { ++static void genTeamsClauses( ++ lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, ++ lower::StatementContext &stmtCtx, const List &clauses, ++ mlir::Location loc, mlir::omp::TeamsOperands &clauseOps, ++ llvm::SmallVectorImpl &reductionSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); - cp.processDefault(); -@@ -1220,7 +1311,21 @@ - cp.processThreadLimit(stmtCtx, clauseOps); - // TODO Support delayed privatization. + cp.processIf(llvm::omp::Directive::OMPD_teams, clauseOps); +- cp.processNumTeams(stmtCtx, clauseOps); +- cp.processThreadLimit(stmtCtx, clauseOps); +- // TODO Support delayed privatization. - cp.processTODO(loc, llvm::omp::Directive::OMPD_teams); -+ // Evaluate NUM_TEAMS and THREAD_LIMIT on the host device, if currently inside -+ // of an omp.target operation. -+ // Don't store num_teams and thread_limit clause operators into clauseOps -+ // because then they would always be added to the omp.teams operation during -+ // its creation. We might need to attach them to the parent omp.target. -+ if (evalOutsideTarget) { -+ HostClausesInsertionGuard guard(converter.getFirOpBuilder()); -+ cp.processNumTeams(stmtCtx, numTeamsClauseOps); -+ cp.processThreadLimit(stmtCtx, threadLimitClauseOps); -+ } else { -+ cp.processNumTeams(stmtCtx, numTeamsClauseOps); -+ cp.processThreadLimit(stmtCtx, threadLimitClauseOps); ++ if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps)) { ++ cp.processNumTeams(stmtCtx, clauseOps); ++ cp.processThreadLimit(stmtCtx, clauseOps); + } + -+ // cp.processTODO(loc, llvm::omp::Directive::OMPD_teams); ++ cp.processReduction(loc, clauseOps, reductionSyms); } static void genWsloopClauses( -@@ -1284,12 +1389,24 @@ - } - - static mlir::omp::DistributeOp --genDistributeOp(lower::AbstractConverter &converter, lower::SymMap &symTable, -- semantics::SemanticsContext &semaCtx, -- lower::pft::Evaluation &eval, mlir::Location loc, -- const ConstructQueue &queue, ConstructQueue::iterator item) { -- TODO(loc, "Distribute construct"); -- return nullptr; -+genDistributeWrapperOp(lower::AbstractConverter &converter, -+ semantics::SemanticsContext &semaCtx, -+ lower::pft::Evaluation &eval, mlir::Location loc, -+ const mlir::omp::DistributeClauseOps &clauseOps, -+ DataSharingProcessor &dsp) { -+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); -+ -+ // Create omp.distribute wrapper. -+ auto distributeOp = -+ firOpBuilder.create(loc, clauseOps); -+ -+ // TODO: Populate entry block arguments with reduction variables. -+ firOpBuilder.createBlock(&distributeOp.getRegion()); -+ -+ firOpBuilder.setInsertionPoint( -+ lower::genOpenMPTerminator(firOpBuilder, distributeOp, loc)); -+ -+ return distributeOp; - } - - static mlir::omp::FlushOp -@@ -1305,6 +1422,55 @@ - converter.getCurrentLocation(), operandRange); - } - -+static mlir::omp::LoopNestOp -+genLoopNestOp(lower::AbstractConverter &converter, lower::SymMap &symTable, -+ semantics::SemanticsContext &semaCtx, -+ lower::pft::Evaluation &eval, mlir::Location loc, -+ const ConstructQueue &queue, ConstructQueue::iterator item, -+ mlir::omp::LoopNestClauseOps &clauseOps, -+ llvm::ArrayRef iv, -+ llvm::ArrayRef wrapperSyms, -+ llvm::ArrayRef wrapperArgs, -+ llvm::omp::Directive directive, DataSharingProcessor &dsp) { -+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); -+ -+ auto ivCallback = [&](mlir::Operation *op) { -+ genLoopVars(op, converter, loc, iv, wrapperSyms, wrapperArgs); -+ return llvm::SmallVector(iv); -+ }; -+ -+ auto *nestedEval = -+ getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); +@@ -1478,6 +1884,7 @@ + std::pair> + wrapperArgs, + llvm::omp::Directive directive, DataSharingProcessor &dsp) { ++ + auto ivCallback = [&](mlir::Operation *op) { + genLoopVars(op, converter, loc, iv, wrapperArgs); + return llvm::SmallVector(iv); +@@ -1486,13 +1893,15 @@ + auto *nestedEval = + getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); + +- return genOpWithBody( + auto loopNestOp = genOpWithBody( -+ OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, -+ directive) -+ .setClauses(&item->clauses) -+ .setDataSharingProcessor(&dsp) -+ .setGenRegionEntryCb(ivCallback), -+ queue, item, clauseOps); -+ -+ // Create trip_count if inside of omp.target and this is host compilation -+ auto offloadMod = llvm::dyn_cast( -+ firOpBuilder.getModule().getOperation()); -+ auto targetOp = loopNestOp->getParentOfType(); -+ -+ if (offloadMod && targetOp && !offloadMod.getIsTargetDevice() && -+ targetOp.isTargetSPMDLoop()) { -+ // Lower loop bounds and step, and process collapsing again, putting lowered -+ // values outside of omp.target this time. This enables calculating and -+ // accessing the trip count in the host, which is needed when lowering to -+ // LLVM IR via the OMPIRBuilder. -+ HostClausesInsertionGuard guard(firOpBuilder); -+ mlir::omp::CollapseClauseOps collapseClauseOps; -+ llvm::SmallVector iv; -+ ClauseProcessor cp(converter, semaCtx, item->clauses); -+ cp.processCollapse(loc, eval, collapseClauseOps, iv); -+ targetOp.getTripCountMutable().assign(calculateTripCount( -+ converter.getFirOpBuilder(), loc, collapseClauseOps)); -+ } -+ return loopNestOp; -+} -+ - static mlir::omp::MasterOp - genMasterOp(lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, -@@ -1344,40 +1510,56 @@ - semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, mlir::Location loc, - const ConstructQueue &queue, ConstructQueue::iterator item, -- bool outerCombined = false) { -+ mlir::omp::ParallelClauseOps &clauseOps, -+ mlir::omp::NumThreadsClauseOps &numThreadsClauseOps, -+ llvm::ArrayRef reductionSyms, -+ llvm::ArrayRef reductionTypes, -+ mlir::omp::TargetOp parentTarget = nullptr) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); -- lower::StatementContext stmtCtx; -- mlir::omp::ParallelClauseOps clauseOps; -- llvm::SmallVector reductionTypes; -- llvm::SmallVector reductionSyms; -- genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc, -- /*processReduction=*/!outerCombined, clauseOps, -- reductionTypes, reductionSyms); - - auto reductionCallback = [&](mlir::Operation *op) { -- genReductionVars(op, converter, loc, reductionSyms, reductionTypes); -- return reductionSyms; -+ llvm::SmallVector blockArgLocs(reductionSyms.size(), loc); -+ -+ mlir::Block *entryBlock = firOpBuilder.createBlock( -+ &op->getRegion(0), {}, reductionTypes, blockArgLocs); -+ -+ // Bind the reduction arguments to their block arguments. -+ for (auto [arg, prv] : -+ llvm::zip_equal(reductionSyms, entryBlock->getArguments())) -+ converter.bindSymbol(*arg, prv); -+ return llvm::SmallVector(reductionSyms); - }; - - OpWithBodyGenInfo genInfo = - OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, - llvm::omp::Directive::OMPD_parallel) -- .setOuterCombined(outerCombined) + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, + directive) .setClauses(&item->clauses) - .setReductions(&reductionSyms, &reductionTypes) - .setGenRegionEntryCb(reductionCallback); - -- if (!enableDelayedPrivatization) -- return genOpWithBody(genInfo, queue, item, -- clauseOps); -+ if (!enableDelayedPrivatization) { -+ auto parallelOp = -+ genOpWithBody(genInfo, queue, item, clauseOps); -+ if (numThreadsClauseOps.numThreadsVar) { -+ if (parentTarget) -+ parentTarget.getNumThreadsMutable().assign( -+ numThreadsClauseOps.numThreadsVar); -+ else -+ parallelOp.getNumThreadsVarMutable().assign( -+ numThreadsClauseOps.numThreadsVar); -+ } -+ return parallelOp; -+ } + .setDataSharingProcessor(&dsp) + .setGenRegionEntryCb(ivCallback), + queue, item, clauseOps); ++ ++ return loopNestOp; + } -- bool privatize = !outerCombined; + static void genLoopOp(lower::AbstractConverter &converter, +@@ -1509,7 +1918,8 @@ DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, - lower::omp::isLastItemInQueue(item, queue), + /*shouldCollectPreDeterminedSymbols=*/true, /*useDelayedPrivatization=*/true, &symTable); +- dsp.processStep1(&loopClauseOps); + dsp.processStep1(); -+ dsp.processStep2(); - -- if (privatize) -- dsp.processStep1(&clauseOps); -+ const auto &privateClauseOps = dsp.getPrivateClauseOps(); -+ clauseOps.privateVars = privateClauseOps.privateVars; -+ clauseOps.privatizers = privateClauseOps.privatizers; - - auto genRegionEntryCB = [&](mlir::Operation *op) { - auto parallelOp = llvm::cast(op); -@@ -1386,7 +1568,7 @@ - clauseOps.reductionVars.size(), loc); - - llvm::SmallVector allRegionArgTypes; -- mergePrivateVarsInfo(parallelOp, llvm::ArrayRef(reductionTypes), -+ mergePrivateVarsInfo(parallelOp, reductionTypes, - llvm::function_ref{ - [](mlir::Value v) { return v.getType(); }}, - allRegionArgTypes); -@@ -1401,9 +1583,9 @@ - firOpBuilder.createBlock(®ion, /*insertPt=*/{}, allRegionArgTypes, - allRegionArgLocs); - -- llvm::SmallVector allSymbols = reductionSyms; -- allSymbols.append(dsp.getAllSymbolsToPrivatize().begin(), -- dsp.getAllSymbolsToPrivatize().end()); -+ llvm::SmallVector allSymbols(reductionSyms); -+ allSymbols.append(dsp.getDelayedPrivSyms().begin(), -+ dsp.getDelayedPrivSyms().end()); - - for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) { - converter.bindSymbol(*arg, hlfir::translateToExtendedValue( -@@ -1418,7 +1600,62 @@ - }; - - genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp); -- return genOpWithBody(genInfo, queue, item, clauseOps); -+ auto parallelOp = -+ genOpWithBody(genInfo, queue, item, clauseOps); -+ if (numThreadsClauseOps.numThreadsVar) { -+ if (parentTarget) -+ parentTarget.getNumThreadsMutable().assign( -+ numThreadsClauseOps.numThreadsVar); -+ else -+ parallelOp.getNumThreadsVarMutable().assign( -+ numThreadsClauseOps.numThreadsVar); -+ } -+ return parallelOp; -+} -+ -+static mlir::omp::ParallelOp genParallelWrapperOp( -+ lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, -+ lower::pft::Evaluation &eval, mlir::Location loc, -+ const mlir::omp::ParallelClauseOps &clauseOps, -+ mlir::omp::NumThreadsClauseOps &numThreadsClauseOps, -+ llvm::ArrayRef reductionSyms, -+ llvm::ArrayRef reductionTypes, mlir::omp::TargetOp parentTarget, -+ DataSharingProcessor &dsp) { -+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); -+ -+ // Create omp.parallel wrapper. -+ auto parallelOp = firOpBuilder.create(loc, clauseOps); -+ -+ if (numThreadsClauseOps.numThreadsVar) { -+ if (parentTarget) -+ parentTarget.getNumThreadsMutable().assign( -+ numThreadsClauseOps.numThreadsVar); -+ else -+ parallelOp.getNumThreadsVarMutable().assign( -+ numThreadsClauseOps.numThreadsVar); -+ } -+ -+ // Populate entry block arguments with reduction and private variables. -+ mlir::OperandRange privateVars = parallelOp.getPrivateVars(); -+ -+ llvm::SmallVector blockArgTypes(reductionTypes.begin(), -+ reductionTypes.end()); -+ blockArgTypes.reserve(blockArgTypes.size() + privateVars.size()); -+ llvm::transform(privateVars, std::back_inserter(blockArgTypes), -+ [](mlir::Value v) { return v.getType(); }); -+ -+ llvm::SmallVector blockArgLocs(reductionTypes.size(), loc); -+ blockArgLocs.reserve(blockArgLocs.size() + privateVars.size()); -+ llvm::transform(privateVars, std::back_inserter(blockArgLocs), -+ [](mlir::Value v) { return v.getLoc(); }); -+ -+ firOpBuilder.createBlock(¶llelOp.getRegion(), {}, blockArgTypes, -+ blockArgLocs); -+ -+ firOpBuilder.setInsertionPoint( -+ lower::genOpenMPTerminator(firOpBuilder, parallelOp, loc)); -+ -+ return parallelOp; - } ++ dsp.processStep2(&loopClauseOps); - static mlir::omp::SectionOp -@@ -1443,13 +1680,15 @@ - mlir::omp::SectionsClauseOps clauseOps; - genSectionsClauses(converter, semaCtx, item->clauses, loc, clauseOps); - -- auto &builder = converter.getFirOpBuilder(); -+ auto &firOpBuilder = converter.getFirOpBuilder(); - - // Insert privatizations before SECTIONS - symTable.pushScope(); -+ // TODO: Add support for delayed privatization. + mlir::omp::LoopNestOperands loopNestClauseOps; + llvm::SmallVector iv; +@@ -1631,6 +2041,8 @@ DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, lower::omp::isLastItemInQueue(item, queue)); dsp.processStep1(); ++ // TODO: Add support for delayed privatization. + dsp.processStep2(); List nonDsaClauses; List lastprivates; -@@ -1488,9 +1727,9 @@ - assert(lastSectionOp != body.rend()); - - for (const clause::Lastprivate *lastp : lastprivates) { -- builder.setInsertionPoint( -+ firOpBuilder.setInsertionPoint( - lastSectionOp->getRegion(0).back().getTerminator()); -- mlir::OpBuilder::InsertPoint insp = builder.saveInsertionPoint(); -+ mlir::OpBuilder::InsertPoint insp = firOpBuilder.saveInsertionPoint(); - const auto &objList = std::get(lastp->t); - for (const Object &object : objList) { - semantics::Symbol *sym = object.sym(); -@@ -1500,65 +1739,35 @@ - } +@@ -1687,8 +2099,8 @@ + } + + ConstructQueue sectionQueue{buildConstructQueue( +- converter.getFirOpBuilder().getModule(), semaCtx, nestedEval, +- sectionConstruct->source, llvm::omp::Directive::OMPD_section, {})}; ++ builder.getModule(), semaCtx, nestedEval, sectionConstruct->source, ++ llvm::omp::Directive::OMPD_section, {})}; + + builder.setInsertionPoint(terminator); + genOpWithBody( +@@ -1724,7 +2136,7 @@ // Perform DataSharingProcessor's step2 out of SECTIONS -- builder.setInsertionPointAfter(sectionsOp.getOperation()); + builder.setInsertionPointAfter(sectionsOp.getOperation()); - dsp.processStep2(sectionsOp, false); -+ firOpBuilder.setInsertionPointAfter(sectionsOp.getOperation()); + dsp.processStep3(sectionsOp, false); // Emit implicit barrier to synchronize threads and avoid data // races on post-update of lastprivate variables when `nowait` // clause is present. - if (clauseOps.nowaitAttr && !lastprivates.empty()) -- builder.create(loc); -+ firOpBuilder.create(loc); - - symTable.popScope(); - return sectionsOp; - } - --static mlir::omp::SimdOp --genSimdOp(lower::AbstractConverter &converter, lower::SymMap &symTable, -- semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, -- mlir::Location loc, const ConstructQueue &queue, -- ConstructQueue::iterator item) { -+static mlir::omp::SimdOp genSimdWrapperOp(lower::AbstractConverter &converter, -+ semantics::SemanticsContext &semaCtx, -+ lower::pft::Evaluation &eval, -+ mlir::Location loc, -+ mlir::omp::SimdClauseOps &clauseOps, -+ DataSharingProcessor &dsp) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); -- symTable.pushScope(); -- DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, -- lower::omp::isLastItemInQueue(item, queue)); -- dsp.processStep1(); -- -- lower::StatementContext stmtCtx; -- mlir::omp::LoopNestClauseOps loopClauseOps; -- mlir::omp::SimdClauseOps simdClauseOps; -- llvm::SmallVector iv; -- genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, -- loopClauseOps, iv); -- genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps); - - // Create omp.simd wrapper. -- auto simdOp = firOpBuilder.create(loc, simdClauseOps); -+ auto simdOp = firOpBuilder.create(loc, clauseOps); - -- // TODO: Add reduction-related arguments to the wrapper's entry block. -+ // TODO: Populate entry block arguments with reduction variables. - firOpBuilder.createBlock(&simdOp.getRegion()); -+ - firOpBuilder.setInsertionPoint( - lower::genOpenMPTerminator(firOpBuilder, simdOp, loc)); - -- // Create nested omp.loop_nest and fill body with loop contents. -- auto loopOp = firOpBuilder.create(loc, loopClauseOps); -- -- auto *nestedEval = -- getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); -- -- auto ivCallback = [&](mlir::Operation *op) { -- genLoopVars(op, converter, loc, iv); -- return iv; -- }; -- -- createBodyOfOp(*loopOp, -- OpWithBodyGenInfo(converter, symTable, semaCtx, loc, -- *nestedEval, llvm::omp::Directive::OMPD_simd) -- .setClauses(&item->clauses) -- .setDataSharingProcessor(&dsp) -- .setGenRegionEntryCb(ivCallback), -- queue, item); -- -- symTable.popScope(); - return simdOp; - } - -@@ -1581,7 +1790,7 @@ - genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, -- ConstructQueue::iterator item, bool outerCombined = false) { -+ ConstructQueue::iterator item) { +@@ -1765,23 +2177,27 @@ + ConstructQueue::const_iterator item) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); lower::StatementContext stmtCtx; +- +- bool processHostOnlyClauses = +- !llvm::cast(*converter.getModuleOp()) +- .getIsTargetDevice(); ++ bool isTargetDevice = ++ llvm::cast(*converter.getModuleOp()) ++ .getIsTargetDevice(); ++ ++ // Introduce a new host_eval information structure for this target region. ++ if (!isTargetDevice) ++ hostEvalInfo.emplace_back(); + + mlir::omp::TargetOperands clauseOps; + llvm::SmallVector mapSyms, isDevicePtrSyms, + hasDeviceAddrSyms; +- genTargetClauses(converter, semaCtx, stmtCtx, item->clauses, loc, +- processHostOnlyClauses, clauseOps, hasDeviceAddrSyms, +- isDevicePtrSyms, mapSyms); ++ genTargetClauses(converter, semaCtx, stmtCtx, eval, item->clauses, loc, ++ clauseOps, hasDeviceAddrSyms, isDevicePtrSyms, mapSyms); -@@ -1595,17 +1804,23 @@ - llvm::SmallVector mapLocs, devicePtrLocs, deviceAddrLocs; - llvm::SmallVector mapTypes, devicePtrTypes, deviceAddrTypes; - genTargetClauses(converter, semaCtx, stmtCtx, item->clauses, loc, -- processHostOnlyClauses, /*processReduction=*/outerCombined, -- clauseOps, mapSyms, mapLocs, mapTypes, deviceAddrSyms, -- deviceAddrLocs, deviceAddrTypes, devicePtrSyms, -- devicePtrLocs, devicePtrTypes); -+ processHostOnlyClauses, clauseOps, mapSyms, mapLocs, -+ mapTypes, deviceAddrSyms, deviceAddrLocs, deviceAddrTypes, -+ devicePtrSyms, devicePtrLocs, devicePtrTypes); - -- llvm::SmallVector privateSyms; DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/ lower::omp::isLastItemInQueue(item, queue), @@ -5650,672 +5898,322 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/OpenMP.cp - dsp.processStep1(&clauseOps); + enableDelayedPrivatizationStaging, &symTable); + dsp.processStep1(); -+ -+ if (enableDelayedPrivatizationStaging) { -+ dsp.processStep2(); -+ -+ const auto &privateClauseOps = dsp.getPrivateClauseOps(); -+ clauseOps.privateVars = privateClauseOps.privateVars; -+ clauseOps.privatizers = privateClauseOps.privatizers; -+ } ++ if (enableDelayedPrivatizationStaging) ++ dsp.processStep2(&clauseOps); // 5.8.1 Implicit Data-Mapping Attribute Rules // The following code follows the implicit data-mapping rules to map all the -@@ -1700,6 +1915,7 @@ +@@ -1805,84 +2221,83 @@ + if (llvm::is_contained(mapSyms, common)) + return; + +- // If we come across a symbol without a symbol address, we +- // return as we cannot process it, this is intended as a +- // catch all early exit for symbols that do not have a +- // corresponding extended value. Such as subroutines, +- // interfaces and named blocks. ++ // If we come across a symbol without a symbol address, we return as we ++ // cannot process it, this is intended as a catch all early exit for ++ // symbols that do not have a corresponding extended value. Such as ++ // subroutines, interfaces and named blocks. + if (!converter.getSymbolAddress(sym)) + return; + +- if (!llvm::is_contained(mapSyms, &sym)) { +- if (const auto *details = +- sym.template detailsIf()) +- converter.copySymbolBinding(details->symbol(), sym); +- llvm::SmallVector bounds; +- std::stringstream name; +- fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym); +- name << sym.name().ToString(); +- +- lower::AddrAndBoundsInfo info = getDataOperandBaseAddr( +- converter, firOpBuilder, sym, converter.getCurrentLocation()); +- mlir::Value baseOp = info.rawInput; +- if (mlir::isa(fir::unwrapRefType(baseOp.getType()))) +- bounds = lower::genBoundsOpsFromBox( +- firOpBuilder, converter.getCurrentLocation(), dataExv, info); +- if (mlir::isa(fir::unwrapRefType(baseOp.getType()))) { +- bool dataExvIsAssumedSize = +- semantics::IsAssumedSizeArray(sym.GetUltimate()); +- bounds = lower::genBaseBoundsOps( +- firOpBuilder, converter.getCurrentLocation(), dataExv, +- dataExvIsAssumedSize); +- } ++ if (llvm::is_contained(mapSyms, &sym)) ++ return; + +- llvm::omp::OpenMPOffloadMappingFlags mapFlag = +- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; +- mlir::omp::VariableCaptureKind captureKind = +- mlir::omp::VariableCaptureKind::ByRef; +- +- mlir::Type eleType = baseOp.getType(); +- if (auto refType = mlir::dyn_cast(baseOp.getType())) +- eleType = refType.getElementType(); +- +- // If a variable is specified in declare target link and if device +- // type is not specified as `nohost`, it needs to be mapped tofrom +- mlir::ModuleOp mod = firOpBuilder.getModule(); +- mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym)); +- auto declareTargetOp = +- llvm::dyn_cast_if_present(op); +- if (declareTargetOp && declareTargetOp.isDeclareTarget()) { +- if (declareTargetOp.getDeclareTargetCaptureClause() == +- mlir::omp::DeclareTargetCaptureClause::link && +- declareTargetOp.getDeclareTargetDeviceType() != +- mlir::omp::DeclareTargetDeviceType::nohost) { +- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; +- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; +- } +- } else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { +- captureKind = mlir::omp::VariableCaptureKind::ByCopy; +- } else if (!fir::isa_builtin_cptr_type(eleType)) { ++ if (const auto *details = ++ sym.template detailsIf()) ++ converter.copySymbolBinding(details->symbol(), sym); ++ llvm::SmallVector bounds; ++ std::stringstream name; ++ fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym); ++ name << sym.name().ToString(); ++ ++ lower::AddrAndBoundsInfo info = getDataOperandBaseAddr( ++ converter, firOpBuilder, sym, converter.getCurrentLocation()); ++ mlir::Value baseOp = info.rawInput; ++ if (mlir::isa(fir::unwrapRefType(baseOp.getType()))) ++ bounds = lower::genBoundsOpsFromBox( ++ firOpBuilder, converter.getCurrentLocation(), dataExv, info); ++ if (mlir::isa(fir::unwrapRefType(baseOp.getType()))) { ++ bool dataExvIsAssumedSize = ++ semantics::IsAssumedSizeArray(sym.GetUltimate()); ++ bounds = lower::genBaseBoundsOps( ++ firOpBuilder, converter.getCurrentLocation(), dataExv, ++ dataExvIsAssumedSize); ++ } ++ ++ llvm::omp::OpenMPOffloadMappingFlags mapFlag = ++ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; ++ mlir::omp::VariableCaptureKind captureKind = ++ mlir::omp::VariableCaptureKind::ByRef; ++ ++ mlir::Type eleType = baseOp.getType(); ++ if (auto refType = mlir::dyn_cast(baseOp.getType())) ++ eleType = refType.getElementType(); ++ ++ // If a variable is specified in declare target link and if device ++ // type is not specified as `nohost`, it needs to be mapped tofrom ++ mlir::ModuleOp mod = firOpBuilder.getModule(); ++ mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym)); ++ auto declareTargetOp = ++ llvm::dyn_cast_if_present(op); ++ if (declareTargetOp && declareTargetOp.isDeclareTarget()) { ++ if (declareTargetOp.getDeclareTargetCaptureClause() == ++ mlir::omp::DeclareTargetCaptureClause::link && ++ declareTargetOp.getDeclareTargetDeviceType() != ++ mlir::omp::DeclareTargetDeviceType::nohost) { + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + } +- auto location = +- mlir::NameLoc::get(mlir::StringAttr::get(firOpBuilder.getContext(), +- sym.name().ToString()), +- baseOp.getLoc()); +- mlir::Value mapOp = createMapInfoOp( +- firOpBuilder, location, baseOp, /*varPtrPtr=*/mlir::Value{}, +- name.str(), bounds, /*members=*/{}, +- /*membersIndex=*/mlir::ArrayAttr{}, +- static_cast< +- std::underlying_type_t>( +- mapFlag), +- captureKind, baseOp.getType()); ++ } else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { ++ captureKind = mlir::omp::VariableCaptureKind::ByCopy; ++ } else if (!fir::isa_builtin_cptr_type(eleType)) { ++ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; ++ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; ++ } ++ auto location = mlir::NameLoc::get( ++ mlir::StringAttr::get(firOpBuilder.getContext(), sym.name().ToString()), ++ baseOp.getLoc()); ++ mlir::Value mapOp = createMapInfoOp( ++ firOpBuilder, location, baseOp, /*varPtrPtr=*/mlir::Value{}, name.str(), ++ bounds, /*members=*/{}, ++ /*membersIndex=*/mlir::ArrayAttr{}, ++ static_cast< ++ std::underlying_type_t>( ++ mapFlag), ++ captureKind, baseOp.getType()); + +- clauseOps.mapVars.push_back(mapOp); +- mapSyms.push_back(&sym); +- } ++ clauseOps.mapVars.push_back(mapOp); ++ mapSyms.push_back(&sym); }; lower::pft::visitAllSymbols(eval, captureImplicitMap); +@@ -1892,6 +2307,7 @@ + extractMappedBaseValues(clauseOps.mapVars, mapBaseValues); + + EntryBlockArgs args; ++ args.hostEvalVars = clauseOps.hostEvalVars; + // TODO: Add in_reduction syms and vars. + args.map.syms = mapSyms; + args.map.vars = mapBaseValues; +@@ -1900,6 +2316,10 @@ + + genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, args, loc, + queue, item, dsp); + - auto targetOp = firOpBuilder.create(loc, clauseOps); - genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, mapSyms, - mapLocs, mapTypes, dsp, loc, queue, item); -@@ -1788,11 +2004,10 @@ - queue, item, clauseOps); ++ // Remove the host_eval information structure created for this target region. ++ if (!isTargetDevice) ++ hostEvalInfo.pop_back(); + return targetOp; } --static mlir::omp::TaskloopOp --genTaskloopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, -- semantics::SemanticsContext &semaCtx, -- lower::pft::Evaluation &eval, mlir::Location loc, -- const ConstructQueue &queue, ConstructQueue::iterator item) { -+static mlir::omp::TaskloopOp genTaskloopWrapperOp( -+ lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, -+ lower::pft::Evaluation &eval, mlir::Location loc, -+ const ConstructQueue &queue, ConstructQueue::iterator item) { - TODO(loc, "Taskloop construct"); - } +@@ -1982,7 +2402,8 @@ + DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, + lower::omp::isLastItemInQueue(item, queue), + /*useDelayedPrivatization=*/true, &symTable); +- dsp.processStep1(&clauseOps); ++ dsp.processStep1(); ++ dsp.processStep2(&clauseOps); -@@ -1819,72 +2034,187 @@ - genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + EntryBlockArgs taskArgs; + taskArgs.priv.syms = dsp.getDelayedPrivSymbols(); +@@ -2066,14 +2487,33 @@ mlir::Location loc, const ConstructQueue &queue, -- ConstructQueue::iterator item, bool outerCombined = false) { -+ ConstructQueue::iterator item) { + ConstructQueue::const_iterator item) { lower::StatementContext stmtCtx; + -+ auto offloadModOp = llvm::cast( -+ converter.getModuleOp().getOperation()); -+ mlir::omp::TargetOp targetOp = -+ findParentTargetOp(converter.getFirOpBuilder()); -+ bool mustEvalOutsideTarget = targetOp && !offloadModOp.getIsTargetDevice(); -+ - mlir::omp::TeamsClauseOps clauseOps; + mlir::omp::TeamsOperands clauseOps; - genTeamsClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); -+ mlir::omp::NumTeamsClauseOps numTeamsClauseOps; -+ mlir::omp::ThreadLimitClauseOps threadLimitClauseOps; -+ genTeamsClauses(converter, semaCtx, stmtCtx, item->clauses, loc, -+ mustEvalOutsideTarget, clauseOps, numTeamsClauseOps, -+ threadLimitClauseOps); ++ llvm::SmallVector reductionSyms; ++ genTeamsClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps, ++ reductionSyms); ++ ++ EntryBlockArgs args; ++ // TODO: Add private syms and vars. ++ args.reduction.syms = reductionSyms; ++ args.reduction.vars = clauseOps.reductionVars; - return genOpWithBody( ++ auto genRegionEntryCB = [&](mlir::Operation *op) { ++ genEntryBlock(converter, args, op->getRegion(0)); ++ bindEntryBlockArgs( ++ converter, llvm::cast(op), args); ++ return llvm::to_vector(llvm::concat( ++ args.priv.syms, args.reduction.syms)); ++ }; ++ + auto teamsOp = genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_teams) -- .setOuterCombined(outerCombined) - .setClauses(&item->clauses), +- .setClauses(&item->clauses), ++ .setClauses(&item->clauses) ++ .setGenRegionEntryCb(genRegionEntryCB), queue, item, clauseOps); + -+ if (numTeamsClauseOps.numTeamsUpperVar) { -+ if (mustEvalOutsideTarget) -+ targetOp.getNumTeamsUpperMutable().assign( -+ numTeamsClauseOps.numTeamsUpperVar); -+ else -+ teamsOp.getNumTeamsUpperMutable().assign( -+ numTeamsClauseOps.numTeamsUpperVar); -+ } -+ -+ if (threadLimitClauseOps.threadLimitVar) { -+ if (mustEvalOutsideTarget) -+ targetOp.getTeamsThreadLimitMutable().assign( -+ threadLimitClauseOps.threadLimitVar); -+ else -+ teamsOp.getThreadLimitMutable().assign( -+ threadLimitClauseOps.threadLimitVar); -+ } -+ + return teamsOp; } --static mlir::omp::WsloopOp --genWsloopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, -- semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, -- mlir::Location loc, const ConstructQueue &queue, -- ConstructQueue::iterator item) { -+static mlir::omp::WsloopOp genWsloopWrapperOp( -+ lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, -+ lower::pft::Evaluation &eval, mlir::Location loc, -+ const mlir::omp::WsloopClauseOps &clauseOps, -+ llvm::ArrayRef reductionSyms, -+ llvm::ArrayRef reductionTypes, DataSharingProcessor &dsp) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); -- symTable.pushScope(); -- DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, -- lower::omp::isLastItemInQueue(item, queue)); -- dsp.processStep1(); - -- lower::StatementContext stmtCtx; -- mlir::omp::LoopNestClauseOps loopClauseOps; -- mlir::omp::WsloopClauseOps wsClauseOps; -- llvm::SmallVector iv; -- llvm::SmallVector reductionTypes; -- llvm::SmallVector reductionSyms; -- genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, -- loopClauseOps, iv); -- genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, wsClauseOps, -- reductionTypes, reductionSyms); -- -- // Create omp.wsloop wrapper and populate entry block arguments with reduction -- // variables. -- auto wsloopOp = firOpBuilder.create(loc, wsClauseOps); -+ // Create omp.wsloop wrapper. - llvm::SmallVector reductionLocs(reductionSyms.size(), loc); -- mlir::Block *wsloopEntryBlock = firOpBuilder.createBlock( -- &wsloopOp.getRegion(), {}, reductionTypes, reductionLocs); -+ auto wsloopOp = firOpBuilder.create(loc, clauseOps); -+ -+ // Populate entry block arguments with reduction variables. -+ firOpBuilder.createBlock(&wsloopOp.getRegion(), {}, reductionTypes, -+ reductionLocs); -+ - firOpBuilder.setInsertionPoint( - lower::genOpenMPTerminator(firOpBuilder, wsloopOp, loc)); - -- // Create nested omp.loop_nest and fill body with loop contents. -- auto loopOp = firOpBuilder.create(loc, loopClauseOps); -+ return wsloopOp; -+} + //===----------------------------------------------------------------------===// +@@ -2097,7 +2537,8 @@ + DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + enableDelayedPrivatizationStaging, &symTable); +- dsp.processStep1(&distributeClauseOps); ++ dsp.processStep1(); ++ dsp.processStep2(&distributeClauseOps); -- auto *nestedEval = -- getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); -+//===----------------------------------------------------------------------===// -+// Code generation functions for the standalone version of constructs that can -+// be a leaf in a composite construct -+//===----------------------------------------------------------------------===// + mlir::omp::LoopNestOperands loopNestClauseOps; + llvm::SmallVector iv; +@@ -2122,7 +2563,6 @@ + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { + lower::StatementContext stmtCtx; +- + mlir::omp::WsloopOperands wsloopClauseOps; + llvm::SmallVector wsloopReductionSyms; + genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, +@@ -2133,6 +2573,7 @@ + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, &symTable); + dsp.processStep1(); ++ dsp.processStep2(); -- auto ivCallback = [&](mlir::Operation *op) { -- genLoopVars(op, converter, loc, iv, reductionSyms, -- wsloopEntryBlock->getArguments()); -- return iv; -- }; -+static void genStandaloneDistribute( -+ lower::AbstractConverter &converter, lower::SymMap &symTable, -+ semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, -+ mlir::Location loc, const ConstructQueue &queue, -+ ConstructQueue::iterator item, DataSharingProcessor &dsp) { -+ mlir::omp::DistributeClauseOps distributeClauseOps; -+ // TODO: Process DISTRIBUTE clauses - -- createBodyOfOp(*loopOp, -- OpWithBodyGenInfo(converter, symTable, semaCtx, loc, -- *nestedEval, llvm::omp::Directive::OMPD_do) -- .setClauses(&item->clauses) -- .setDataSharingProcessor(&dsp) -- .setReductions(&reductionSyms, &reductionTypes) -- .setGenRegionEntryCb(ivCallback), -- queue, item); -- symTable.popScope(); -- return wsloopOp; -+ mlir::omp::LoopNestClauseOps loopNestClauseOps; -+ llvm::SmallVector iv; -+ genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, -+ loopNestClauseOps, iv); -+ -+ auto distributeOp = genDistributeWrapperOp(converter, semaCtx, eval, loc, -+ distributeClauseOps, dsp); -+ -+ genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, -+ loopNestClauseOps, iv, -+ /*wrapperSyms=*/{}, distributeOp.getRegion().getArguments(), -+ llvm::omp::Directive::OMPD_distribute, dsp); -+} -+ -+static void genStandaloneDo(lower::AbstractConverter &converter, -+ lower::SymMap &symTable, -+ semantics::SemanticsContext &semaCtx, -+ lower::pft::Evaluation &eval, mlir::Location loc, -+ const ConstructQueue &queue, -+ ConstructQueue::iterator item, -+ DataSharingProcessor &dsp) { -+ lower::StatementContext stmtCtx; -+ -+ mlir::omp::WsloopClauseOps wsloopClauseOps; -+ llvm::SmallVector reductionSyms; -+ llvm::SmallVector reductionTypes; -+ genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, -+ wsloopClauseOps, reductionTypes, reductionSyms); -+ -+ mlir::omp::LoopNestClauseOps loopNestClauseOps; -+ llvm::SmallVector iv; -+ genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, -+ loopNestClauseOps, iv); -+ -+ auto wsloopOp = -+ genWsloopWrapperOp(converter, semaCtx, eval, loc, wsloopClauseOps, -+ reductionSyms, reductionTypes, dsp); -+ -+ genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, -+ loopNestClauseOps, iv, reductionSyms, -+ wsloopOp.getRegion().getArguments(), -+ llvm::omp::Directive::OMPD_do, dsp); -+} -+ -+static void genStandaloneParallel(lower::AbstractConverter &converter, -+ lower::SymMap &symTable, -+ semantics::SemanticsContext &semaCtx, -+ lower::pft::Evaluation &eval, -+ mlir::Location loc, -+ const ConstructQueue &queue, -+ ConstructQueue::iterator item) { -+ lower::StatementContext stmtCtx; -+ -+ auto offloadModOp = -+ llvm::cast(*converter.getModuleOp()); -+ mlir::omp::TargetOp targetOp = -+ findParentTargetOp(converter.getFirOpBuilder()); -+ bool evalOutsideTarget = -+ targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval); -+ -+ mlir::omp::ParallelClauseOps parallelClauseOps; -+ mlir::omp::NumThreadsClauseOps numThreadsClauseOps; -+ llvm::SmallVector reductionSyms; -+ llvm::SmallVector reductionTypes; -+ genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc, -+ evalOutsideTarget, parallelClauseOps, numThreadsClauseOps, -+ reductionTypes, reductionSyms); -+ -+ genParallelOp(converter, symTable, semaCtx, eval, loc, queue, item, -+ parallelClauseOps, numThreadsClauseOps, reductionSyms, -+ reductionTypes, evalOutsideTarget ? targetOp : nullptr); -+} -+ -+static void genStandaloneSimd(lower::AbstractConverter &converter, -+ lower::SymMap &symTable, -+ semantics::SemanticsContext &semaCtx, -+ lower::pft::Evaluation &eval, mlir::Location loc, -+ const ConstructQueue &queue, -+ ConstructQueue::iterator item, -+ DataSharingProcessor &dsp) { -+ mlir::omp::SimdClauseOps simdClauseOps; -+ genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps); -+ -+ mlir::omp::LoopNestClauseOps loopNestClauseOps; -+ llvm::SmallVector iv; -+ genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, -+ loopNestClauseOps, iv); -+ -+ auto simdOp = -+ genSimdWrapperOp(converter, semaCtx, eval, loc, simdClauseOps, dsp); -+ -+ genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, -+ loopNestClauseOps, iv, -+ /*wrapperSyms=*/{}, simdOp.getRegion().getArguments(), -+ llvm::omp::Directive::OMPD_simd, dsp); -+} -+ -+static void genStandaloneTaskloop( -+ lower::AbstractConverter &converter, lower::SymMap &symTable, -+ semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, -+ mlir::Location loc, const ConstructQueue &queue, -+ ConstructQueue::iterator item, DataSharingProcessor &dsp) { -+ TODO(loc, "Taskloop construct"); - } + mlir::omp::LoopNestOperands loopNestClauseOps; + llvm::SmallVector iv; +@@ -2170,7 +2611,8 @@ + dsp.emplace(converter, semaCtx, item->clauses, eval, + lower::omp::isLastItemInQueue(item, queue), + /*useDelayedPrivatization=*/true, &symTable); +- dsp->processStep1(¶llelClauseOps); ++ dsp->processStep1(); ++ dsp->processStep2(¶llelClauseOps); + } - //===----------------------------------------------------------------------===// -@@ -1895,26 +2225,195 @@ - lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, -- ConstructQueue::iterator item) { -- TODO(loc, "Composite DISTRIBUTE PARALLEL DO"); -+ ConstructQueue::iterator item, DataSharingProcessor &dsp) { -+ lower::StatementContext stmtCtx; -+ -+ auto offloadModOp = -+ llvm::cast(*converter.getModuleOp()); -+ mlir::omp::TargetOp targetOp = -+ findParentTargetOp(converter.getFirOpBuilder()); -+ bool evalOutsideTarget = -+ targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval); -+ -+ // Clause processing. -+ mlir::omp::DistributeClauseOps distributeClauseOps; -+ // TODO: Process DISTRIBUTE clauses -+ -+ mlir::omp::ParallelClauseOps parallelClauseOps; -+ mlir::omp::NumThreadsClauseOps numThreadsClauseOps; -+ llvm::SmallVector parallelReductionSyms; -+ llvm::SmallVector parallelReductionTypes; -+ genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc, -+ /*evalOutsideTarget=*/evalOutsideTarget, parallelClauseOps, -+ numThreadsClauseOps, parallelReductionTypes, -+ parallelReductionSyms); -+ -+ const auto &privateClauseOps = dsp.getPrivateClauseOps(); -+ parallelClauseOps.privateVars = privateClauseOps.privateVars; -+ parallelClauseOps.privatizers = privateClauseOps.privatizers; -+ -+ mlir::omp::WsloopClauseOps wsloopClauseOps; -+ llvm::SmallVector wsloopReductionSyms; -+ llvm::SmallVector wsloopReductionTypes; -+ genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, -+ wsloopClauseOps, wsloopReductionTypes, wsloopReductionSyms); -+ -+ mlir::omp::LoopNestClauseOps loopNestClauseOps; -+ llvm::SmallVector iv; -+ genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, -+ loopNestClauseOps, iv); -+ -+ // Operation creation. -+ auto distributeOp = genDistributeWrapperOp(converter, semaCtx, eval, loc, -+ distributeClauseOps, dsp); -+ -+ auto parallelOp = genParallelWrapperOp( -+ converter, semaCtx, eval, loc, parallelClauseOps, numThreadsClauseOps, -+ parallelReductionSyms, parallelReductionTypes, -+ evalOutsideTarget ? targetOp : nullptr, dsp); -+ -+ auto wsloopOp = -+ genWsloopWrapperOp(converter, semaCtx, eval, loc, wsloopClauseOps, -+ wsloopReductionSyms, wsloopReductionTypes, dsp); -+ -+ // Construct wrapper entry block list and associated symbols. It is important -+ // that the symbol order and the block argument order match, so that the -+ // symbol-value bindings created are correct. -+ auto wrapperSyms = -+ llvm::to_vector(llvm::concat( -+ parallelReductionSyms, dsp.getDelayedPrivSyms(), -+ wsloopReductionSyms)); -+ -+ auto wrapperArgs = llvm::to_vector( -+ llvm::concat(distributeOp.getRegion().getArguments(), -+ parallelOp.getRegion().getArguments(), -+ wsloopOp.getRegion().getArguments())); -+ -+ assert(wrapperSyms.size() == wrapperArgs.size() && -+ "Number of symbols and wrapper block arguments must match"); -+ genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, -+ loopNestClauseOps, iv, wrapperSyms, wrapperArgs, -+ llvm::omp::Directive::OMPD_distribute_parallel_do, dsp); + EntryBlockArgs parallelArgs; +@@ -2181,7 +2623,8 @@ + parallelArgs.reduction.vars = parallelClauseOps.reductionVars; + genParallelOp(converter, symTable, semaCtx, eval, loc, queue, item, + parallelClauseOps, parallelArgs, +- enableDelayedPrivatization ? &dsp.value() : nullptr); ++ enableDelayedPrivatization ? &dsp.value() : nullptr, ++ /*isComposite=*/false); } - static void genCompositeDistributeParallelDoSimd( - lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, -- ConstructQueue::iterator item) { -- TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD"); -+ ConstructQueue::iterator item, DataSharingProcessor &dsp) { -+ lower::StatementContext stmtCtx; -+ -+ auto offloadModOp = -+ llvm::cast(*converter.getModuleOp()); -+ mlir::omp::TargetOp targetOp = -+ findParentTargetOp(converter.getFirOpBuilder()); -+ bool evalOutsideTarget = -+ targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval); -+ -+ // Clause processing. -+ mlir::omp::DistributeClauseOps distributeClauseOps; -+ // TODO: Process DISTRIBUTE clauses -+ -+ mlir::omp::ParallelClauseOps parallelClauseOps; -+ mlir::omp::NumThreadsClauseOps numThreadsClauseOps; -+ llvm::SmallVector parallelReductionSyms; -+ llvm::SmallVector parallelReductionTypes; -+ genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc, -+ /*evalOutsideTarget=*/evalOutsideTarget, parallelClauseOps, -+ numThreadsClauseOps, parallelReductionTypes, -+ parallelReductionSyms); -+ -+ const auto &privateClauseOps = dsp.getPrivateClauseOps(); -+ parallelClauseOps.privateVars = privateClauseOps.privateVars; -+ parallelClauseOps.privatizers = privateClauseOps.privatizers; -+ -+ mlir::omp::WsloopClauseOps wsloopClauseOps; -+ llvm::SmallVector wsloopReductionSyms; -+ llvm::SmallVector wsloopReductionTypes; -+ genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, -+ wsloopClauseOps, wsloopReductionTypes, wsloopReductionSyms); -+ -+ mlir::omp::SimdClauseOps simdClauseOps; -+ genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps); -+ -+ mlir::omp::LoopNestClauseOps loopNestClauseOps; -+ llvm::SmallVector iv; -+ genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, -+ loopNestClauseOps, iv); -+ -+ // Operation creation. -+ auto distributeOp = genDistributeWrapperOp(converter, semaCtx, eval, loc, -+ distributeClauseOps, dsp); -+ -+ auto parallelOp = genParallelWrapperOp( -+ converter, semaCtx, eval, loc, parallelClauseOps, numThreadsClauseOps, -+ parallelReductionSyms, parallelReductionTypes, -+ evalOutsideTarget ? targetOp : nullptr, dsp); -+ -+ auto wsloopOp = -+ genWsloopWrapperOp(converter, semaCtx, eval, loc, wsloopClauseOps, -+ wsloopReductionSyms, wsloopReductionTypes, dsp); -+ -+ auto simdOp = -+ genSimdWrapperOp(converter, semaCtx, eval, loc, simdClauseOps, dsp); -+ -+ // Construct wrapper entry block list and associated symbols. It is important -+ // that the symbol order and the block argument order match, so that the -+ // symbol-value bindings created are correct. -+ auto wrapperSyms = -+ llvm::to_vector(llvm::concat( -+ parallelReductionSyms, dsp.getDelayedPrivSyms(), -+ wsloopReductionSyms)); -+ -+ auto wrapperArgs = llvm::to_vector(llvm::concat( -+ distributeOp.getRegion().getArguments(), -+ parallelOp.getRegion().getArguments(), -+ wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments())); -+ -+ assert(wrapperSyms.size() == wrapperArgs.size() && -+ "Number of symbols and wrapper block arguments must match"); -+ genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, -+ loopNestClauseOps, iv, wrapperSyms, wrapperArgs, -+ llvm::omp::Directive::OMPD_distribute_parallel_do_simd, dsp); - } + static void genStandaloneSimd(lower::AbstractConverter &converter, +@@ -2200,6 +2643,7 @@ + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, &symTable); + dsp.processStep1(); ++ dsp.processStep2(); --static void genCompositeDistributeSimd(lower::AbstractConverter &converter, -- lower::SymMap &symTable, -- semantics::SemanticsContext &semaCtx, -- lower::pft::Evaluation &eval, -- mlir::Location loc, -- const ConstructQueue &queue, -- ConstructQueue::iterator item) { -- TODO(loc, "Composite DISTRIBUTE SIMD"); -+static void genCompositeDistributeSimd( -+ lower::AbstractConverter &converter, lower::SymMap &symTable, -+ semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, -+ mlir::Location loc, const ConstructQueue &queue, -+ ConstructQueue::iterator item, DataSharingProcessor &dsp) { -+ // Clause processing. -+ mlir::omp::DistributeClauseOps distributeClauseOps; -+ // TODO: Process DISTRIBUTE clauses -+ -+ mlir::omp::SimdClauseOps simdClauseOps; -+ genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps); -+ -+ mlir::omp::LoopNestClauseOps loopNestClauseOps; -+ llvm::SmallVector iv; -+ genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, -+ loopNestClauseOps, iv); -+ -+ // Operation creation. -+ auto distributeOp = genDistributeWrapperOp(converter, semaCtx, eval, loc, -+ distributeClauseOps, dsp); -+ -+ auto simdOp = -+ genSimdWrapperOp(converter, semaCtx, eval, loc, simdClauseOps, dsp); -+ -+ // Construct wrapper entry block list and associated symbols. It is important -+ // that the symbol order and the block argument order match, so that the -+ // symbol-value bindings created are correct. -+ auto wrapperArgs = llvm::to_vector( -+ llvm::concat(distributeOp.getRegion().getArguments(), -+ simdOp.getRegion().getArguments())); -+ -+ assert(wrapperArgs.empty() && -+ "Block args for omp.simd and omp.distribute currently not expected"); -+ genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, -+ loopNestClauseOps, iv, -+ /*wrapperSyms=*/{}, wrapperArgs, -+ llvm::omp::Directive::OMPD_distribute_simd, dsp); - } + mlir::omp::LoopNestOperands loopNestClauseOps; + llvm::SmallVector iv; +@@ -2253,7 +2697,8 @@ + DataSharingProcessor dsp(converter, semaCtx, doItem->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/true, &symTable); +- dsp.processStep1(¶llelClauseOps); ++ dsp.processStep1(); ++ dsp.processStep2(¶llelClauseOps); - static void genCompositeDoSimd(lower::AbstractConverter &converter, -@@ -1922,29 +2421,51 @@ - semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, mlir::Location loc, - const ConstructQueue &queue, -- ConstructQueue::iterator item) { -- ClauseProcessor cp(converter, semaCtx, item->clauses); -- cp.processTODO( -- loc, llvm::omp::OMPD_do_simd); -- // TODO: Add support for vectorization - add vectorization hints inside loop -- // body. -- // OpenMP standard does not specify the length of vector instructions. -- // Currently we safely assume that for !$omp do simd pragma the SIMD length -- // is equal to 1 (i.e. we generate standard workshare loop). -- // When support for vectorization is enabled, then we need to add handling of -- // if clause. Currently if clause can be skipped because we always assume -- // SIMD length = 1. -- genWsloopOp(converter, symTable, semaCtx, eval, loc, queue, item); --} -- --static void genCompositeTaskloopSimd(lower::AbstractConverter &converter, -- lower::SymMap &symTable, -- semantics::SemanticsContext &semaCtx, -- lower::pft::Evaluation &eval, -- mlir::Location loc, -- const ConstructQueue &queue, -- ConstructQueue::iterator item) { -+ ConstructQueue::iterator item, -+ DataSharingProcessor &dsp) { -+ lower::StatementContext stmtCtx; -+ -+ // Clause processing. -+ mlir::omp::WsloopClauseOps wsloopClauseOps; -+ llvm::SmallVector wsloopReductionSyms; -+ llvm::SmallVector wsloopReductionTypes; -+ genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, -+ wsloopClauseOps, wsloopReductionTypes, wsloopReductionSyms); -+ -+ mlir::omp::SimdClauseOps simdClauseOps; -+ genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps); -+ -+ mlir::omp::LoopNestClauseOps loopNestClauseOps; -+ llvm::SmallVector iv; -+ genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, -+ loopNestClauseOps, iv); -+ -+ // Operation creation. -+ auto wsloopOp = -+ genWsloopWrapperOp(converter, semaCtx, eval, loc, wsloopClauseOps, -+ wsloopReductionSyms, wsloopReductionTypes, dsp); -+ -+ auto simdOp = -+ genSimdWrapperOp(converter, semaCtx, eval, loc, simdClauseOps, dsp); -+ -+ // Construct wrapper entry block list and associated symbols. It is important -+ // that the symbol order and the block argument order match, so that the -+ // symbol-value bindings created are correct. -+ auto wrapperArgs = llvm::to_vector(llvm::concat( -+ wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments())); -+ -+ assert(wsloopReductionSyms.size() == wrapperArgs.size() && -+ "Number of symbols and wrapper block arguments must match"); -+ genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, -+ loopNestClauseOps, iv, wsloopReductionSyms, wrapperArgs, -+ llvm::omp::Directive::OMPD_do_simd, dsp); -+} -+ -+static void genCompositeTaskloopSimd( -+ lower::AbstractConverter &converter, lower::SymMap &symTable, -+ semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, -+ mlir::Location loc, const ConstructQueue &queue, -+ ConstructQueue::iterator item, DataSharingProcessor &dsp) { - TODO(loc, "Composite TASKLOOP SIMD"); - } + EntryBlockArgs parallelArgs; + parallelArgs.priv.syms = dsp.getDelayedPrivSymbols(); +@@ -2321,7 +2766,8 @@ + DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/true, &symTable); +- dsp.processStep1(¶llelClauseOps); ++ dsp.processStep1(); ++ dsp.processStep2(¶llelClauseOps); -@@ -1957,18 +2478,33 @@ - semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, mlir::Location loc, - const ConstructQueue &queue, -- ConstructQueue::iterator item) { -+ ConstructQueue::iterator item, -+ DataSharingProcessor *dsp) { - assert(item != queue.end()); -+ bool firstLoopLeaf = !dsp && llvm::omp::getDirectiveAssociation(item->id) == -+ llvm::omp::Association::Loop; -+ -+ std::optional loopDsp; -+ if (firstLoopLeaf) { -+ symTable.pushScope(); -+ loopDsp.emplace(converter, semaCtx, item->clauses, eval, -+ /*shouldCollectPreDeterminedSymbols=*/true, -+ enableDelayedPrivatization, &symTable); -+ dsp = &*loopDsp; -+ dsp->processStep1(); -+ dsp->processStep2(); -+ } + EntryBlockArgs parallelArgs; + parallelArgs.priv.syms = dsp.getDelayedPrivSymbols(); +@@ -2410,6 +2856,7 @@ + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, &symTable); + dsp.processStep1(); ++ dsp.processStep2(); - switch (llvm::omp::Directive dir = item->id) { - case llvm::omp::Directive::OMPD_barrier: - genBarrierOp(converter, symTable, semaCtx, eval, loc, queue, item); - break; - case llvm::omp::Directive::OMPD_distribute: -- genDistributeOp(converter, symTable, semaCtx, eval, loc, queue, item); -+ genStandaloneDistribute(converter, symTable, semaCtx, eval, loc, queue, -+ item, *dsp); - break; - case llvm::omp::Directive::OMPD_do: -- genWsloopOp(converter, symTable, semaCtx, eval, loc, queue, item); -+ genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item, *dsp); - break; - case llvm::omp::Directive::OMPD_loop: - case llvm::omp::Directive::OMPD_masked: -@@ -1982,8 +2518,7 @@ - genOrderedRegionOp(converter, symTable, semaCtx, eval, loc, queue, item); - break; - case llvm::omp::Directive::OMPD_parallel: -- genParallelOp(converter, symTable, semaCtx, eval, loc, queue, item, -- /*outerCombined=*/false); -+ genStandaloneParallel(converter, symTable, semaCtx, eval, loc, queue, item); - break; - case llvm::omp::Directive::OMPD_section: - genSectionOp(converter, symTable, semaCtx, eval, loc, queue, item); -@@ -1992,14 +2527,14 @@ - genSectionsOp(converter, symTable, semaCtx, eval, loc, queue, item); - break; - case llvm::omp::Directive::OMPD_simd: -- genSimdOp(converter, symTable, semaCtx, eval, loc, queue, item); -+ genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item, -+ *dsp); - break; - case llvm::omp::Directive::OMPD_single: - genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); - break; - case llvm::omp::Directive::OMPD_target: -- genTargetOp(converter, symTable, semaCtx, eval, loc, queue, item, -- /*outerCombined=*/false); -+ genTargetOp(converter, symTable, semaCtx, eval, loc, queue, item); - break; - case llvm::omp::Directive::OMPD_target_data: - genTargetDataOp(converter, symTable, semaCtx, eval, loc, queue, item); -@@ -2023,7 +2558,8 @@ - genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item); - break; - case llvm::omp::Directive::OMPD_taskloop: -- genTaskloopOp(converter, symTable, semaCtx, eval, loc, queue, item); -+ genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item, -+ *dsp); - break; - case llvm::omp::Directive::OMPD_taskwait: - genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item); -@@ -2049,26 +2585,30 @@ - // Composite constructs - case llvm::omp::Directive::OMPD_distribute_parallel_do: - genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc, -- queue, item); -+ queue, item, *dsp); - break; - case llvm::omp::Directive::OMPD_distribute_parallel_do_simd: - genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval, -- loc, queue, item); -+ loc, queue, item, *dsp); - break; - case llvm::omp::Directive::OMPD_distribute_simd: - genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, queue, -- item); -+ item, *dsp); - break; - case llvm::omp::Directive::OMPD_do_simd: -- genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item); -+ genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item, -+ *dsp); - break; - case llvm::omp::Directive::OMPD_taskloop_simd: - genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, queue, -- item); -+ item, *dsp); - break; - default: - break; - } -+ -+ if (firstLoopLeaf) -+ symTable.popScope(); - } + // Pass the innermost leaf construct's clauses because that's where COLLAPSE + // is placed by construct decomposition. +@@ -2467,6 +2914,7 @@ + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, &symTable); + dsp.processStep1(); ++ dsp.processStep2(); - //===----------------------------------------------------------------------===// -@@ -2389,6 +2929,7 @@ + // Pass the innermost leaf construct's clauses because that's where COLLAPSE + // is placed by construct decomposition. +@@ -3019,6 +3467,7 @@ ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, eval, source, directive, clauses)}; @@ -6323,47 +6221,17 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/OpenMP.cp genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); } -@@ -2416,6 +2957,7 @@ - std::get(sectionsConstruct.t); +@@ -3044,6 +3493,7 @@ + std::get(sectionsConstruct.t); clauses.append(makeClauses( std::get(endSectionsDirective.t), semaCtx)); + mlir::Location currentLocation = converter.getCurrentLocation(); llvm::omp::Directive directive = -@@ -2433,9 +2975,8 @@ - semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, - const parser::OpenMPConstruct &ompConstruct) { -- std::visit( -- [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, -- ompConstruct.u); -+ std::visit([&](auto &&s) { genOMP(converter, symTable, semaCtx, eval, s); }, -+ ompConstruct.u); - } - - //===----------------------------------------------------------------------===// -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/ReductionProcessor.cpp llvm-project/flang/lib/Lower/OpenMP/ReductionProcessor.cpp ---- llvm-project.orig/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 2024-06-12 10:43:12.624210453 -0500 -+++ llvm-project/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 2024-06-12 10:44:09.351614239 -0500 -@@ -70,6 +70,14 @@ - } - } - -+void ReductionProcessor::addReductionSym( -+ const omp::clause::Reduction &reduction, -+ llvm::SmallVectorImpl &symbols) { -+ const auto &objectList{std::get(reduction.t)}; -+ llvm::transform(objectList, std::back_inserter(symbols), -+ [](const Object &object) { return object.sym(); }); -+} -+ - bool ReductionProcessor::supportedIntrinsicProcReduction( - const omp::clause::ProcedureDesignator &pd) { - semantics::Symbol *sym = pd.v.sym(); -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/ReductionProcessor.h llvm-project/flang/lib/Lower/OpenMP/ReductionProcessor.h ---- llvm-project.orig/flang/lib/Lower/OpenMP/ReductionProcessor.h 2024-06-12 10:43:12.624210453 -0500 -+++ llvm-project/flang/lib/Lower/OpenMP/ReductionProcessor.h 2024-06-12 10:44:09.351614239 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/ReductionProcessor.h llvm-project-aso/flang/lib/Lower/OpenMP/ReductionProcessor.h +--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/ReductionProcessor.h 2024-10-18 17:40:32.496992373 -0500 ++++ llvm-project-aso/flang/lib/Lower/OpenMP/ReductionProcessor.h 2024-11-23 20:39:47.180175366 -0600 @@ -13,10 +13,9 @@ #ifndef FORTRAN_LOWER_REDUCTIONPROCESSOR_H #define FORTRAN_LOWER_REDUCTIONPROCESSOR_H @@ -6376,21 +6244,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Reduction #include "flang/Semantics/symbol.h" #include "flang/Semantics/type.h" #include "mlir/IR/Location.h" -@@ -108,6 +107,10 @@ - mlir::Type type, mlir::Value op1, - mlir::Value op2); - -+ static void addReductionSym( -+ const omp::clause::Reduction &reduction, -+ llvm::SmallVectorImpl &symbols); -+ - /// Creates an OpenMP reduction declaration and inserts it into the provided - /// symbol table. The declaration has a constant initializer with the neutral - /// value `initValue`, and the reduction combiner carried over from `reduce`. -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Utils.cpp llvm-project/flang/lib/Lower/OpenMP/Utils.cpp ---- llvm-project.orig/flang/lib/Lower/OpenMP/Utils.cpp 2024-06-12 10:43:12.624210453 -0500 -+++ llvm-project/flang/lib/Lower/OpenMP/Utils.cpp 2024-06-12 10:44:09.351614239 -0500 -@@ -10,17 +10,18 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/Utils.cpp llvm-project-aso/flang/lib/Lower/OpenMP/Utils.cpp +--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/Utils.cpp 2024-11-23 20:25:26.839275178 -0600 ++++ llvm-project-aso/flang/lib/Lower/OpenMP/Utils.cpp 2024-11-23 20:39:47.180175366 -0600 +@@ -10,20 +10,26 @@ // //===----------------------------------------------------------------------===// @@ -6398,132 +6255,31 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Utils.cpp +#include -#include "Clauses.h" + #include + ++#include #include ++#include #include +#include #include ++#include ++#include #include + #include #include #include #include #include ++#include +#include - #include - #include -@@ -349,6 +350,108 @@ - return sym; - } + #include -+mlir::omp::MapInfoOp -+createMapInfoOp(mlir::OpBuilder &builder, mlir::Location loc, -+ mlir::Value baseAddr, mlir::Value varPtrPtr, std::string name, -+ llvm::ArrayRef bounds, -+ llvm::ArrayRef members, -+ mlir::DenseIntElementsAttr membersIndex, uint64_t mapType, -+ mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy, -+ bool partialMap) { -+ if (auto boxTy = llvm::dyn_cast(baseAddr.getType())) { -+ baseAddr = builder.create(loc, baseAddr); -+ retTy = baseAddr.getType(); -+ } -+ -+ mlir::TypeAttr varType = mlir::TypeAttr::get( -+ llvm::cast(retTy).getElementType()); -+ -+ mlir::omp::MapInfoOp op = builder.create( -+ loc, retTy, baseAddr, varType, varPtrPtr, members, membersIndex, bounds, -+ builder.getIntegerAttr(builder.getIntegerType(64, false), mapType), -+ builder.getAttr(mapCaptureType), -+ builder.getStringAttr(name), builder.getBoolAttr(partialMap)); -+ -+ return op; -+} -+ -+mlir::Value calculateTripCount(fir::FirOpBuilder &builder, mlir::Location loc, -+ const mlir::omp::CollapseClauseOps &ops) { -+ using namespace mlir::arith; -+ assert(ops.loopLBVar.size() == ops.loopUBVar.size() && -+ ops.loopLBVar.size() == ops.loopStepVar.size() && -+ !ops.loopLBVar.empty() && "Invalid bounds or step"); -+ -+ // Get the bit width of an integer-like type. -+ auto widthOf = [](mlir::Type ty) -> unsigned { -+ if (mlir::isa(ty)) { -+ return mlir::IndexType::kInternalStorageBitWidth; -+ } -+ if (auto tyInt = mlir::dyn_cast(ty)) { -+ return tyInt.getWidth(); -+ } -+ llvm_unreachable("Unexpected type"); -+ }; -+ -+ // For a type that is either IntegerType or IndexType, return the -+ // equivalent IntegerType. In the former case this is a no-op. -+ auto asIntTy = [&](mlir::Type ty) -> mlir::IntegerType { -+ if (ty.isIndex()) { -+ return mlir::IntegerType::get(ty.getContext(), widthOf(ty)); -+ } -+ assert(ty.isIntOrIndex() && "Unexpected type"); -+ return mlir::cast(ty); -+ }; -+ -+ // For two given values, establish a common signless IntegerType -+ // that can represent any value of type of x and of type of y, -+ // and return the pair of x, y converted to the new type. -+ auto unifyToSignless = -+ [&](fir::FirOpBuilder &b, mlir::Value x, -+ mlir::Value y) -> std::pair { -+ auto tyX = asIntTy(x.getType()), tyY = asIntTy(y.getType()); -+ unsigned width = std::max(widthOf(tyX), widthOf(tyY)); -+ auto wideTy = mlir::IntegerType::get(b.getContext(), width, -+ mlir::IntegerType::Signless); -+ return std::make_pair(b.createConvert(loc, wideTy, x), -+ b.createConvert(loc, wideTy, y)); -+ }; -+ -+ // Start with signless i32 by default. -+ auto tripCount = builder.createIntegerConstant(loc, builder.getI32Type(), 1); -+ -+ for (auto [origLb, origUb, origStep] : -+ llvm::zip(ops.loopLBVar, ops.loopUBVar, ops.loopStepVar)) { -+ auto tmpS0 = builder.createIntegerConstant(loc, origStep.getType(), 0); -+ auto [step, step0] = unifyToSignless(builder, origStep, tmpS0); -+ auto reverseCond = -+ builder.create(loc, CmpIPredicate::slt, step, step0); -+ auto negStep = builder.create(loc, step0, step); -+ mlir::Value absStep = -+ builder.create(loc, reverseCond, negStep, step); -+ -+ auto [lb, ub] = unifyToSignless(builder, origLb, origUb); -+ auto start = builder.create(loc, reverseCond, ub, lb); -+ auto end = builder.create(loc, reverseCond, lb, ub); -+ -+ mlir::Value range = builder.create(loc, end, start); -+ auto rangeCond = -+ builder.create(loc, CmpIPredicate::slt, end, start); -+ std::tie(range, absStep) = unifyToSignless(builder, range, absStep); -+ // numSteps = (range /u absStep) + 1 -+ auto numSteps = builder.create( -+ loc, builder.create(loc, range, absStep), -+ builder.createIntegerConstant(loc, range.getType(), 1)); -+ -+ auto trip0 = builder.createIntegerConstant(loc, numSteps.getType(), 0); -+ auto loopTripCount = -+ builder.create(loc, rangeCond, trip0, numSteps); -+ auto [totalTC, thisTC] = unifyToSignless(builder, tripCount, loopTripCount); -+ tripCount = builder.create(loc, totalTC, thisTC); -+ } -+ -+ return tripCount; -+} - } // namespace omp - } // namespace lower - } // namespace Fortran -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Utils.h llvm-project/flang/lib/Lower/OpenMP/Utils.h ---- llvm-project.orig/flang/lib/Lower/OpenMP/Utils.h 2024-06-12 10:43:12.624210453 -0500 -+++ llvm-project/flang/lib/Lower/OpenMP/Utils.h 1969-12-31 18:00:00.000000000 -0600 -@@ -1,107 +0,0 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/Utils.h llvm-project-aso/flang/lib/Lower/OpenMP/Utils.h +--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/Utils.h 2024-11-23 20:25:26.839275178 -0600 ++++ llvm-project-aso/flang/lib/Lower/OpenMP/Utils.h 1969-12-31 18:00:00.000000000 -0600 +@@ -1,169 +0,0 @@ -//===-- Lower/OpenMP/Utils.h ------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -6540,6 +6296,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Utils.h l -#include "mlir/IR/Location.h" -#include "mlir/IR/Value.h" -#include "llvm/Support/CommandLine.h" +-#include - -extern llvm::cl::opt treatIndexAsSection; -extern llvm::cl::opt enableDelayedPrivatization; @@ -6560,6 +6317,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Utils.h l -} // namespace parser - -namespace lower { +-class StatementContext; -namespace pft { -struct Evaluation; -} @@ -6575,38 +6333,97 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Utils.h l -// and index data when lowering OpenMP map clauses. Keeps track of the -// placement of the component in the derived type hierarchy it rests within, -// alongside the generated mlir::omp::MapInfoOp for the mapped component. --struct OmpMapMemberIndicesData { +-// +-// As an example of what the contents of this data structure may be like, +-// when provided the following derived type and map of that type: +-// +-// type :: bottom_layer +-// real(8) :: i2 +-// real(4) :: array_i2(10) +-// real(4) :: array_j2(10) +-// end type bottom_layer +-// +-// type :: top_layer +-// real(4) :: i +-// integer(4) :: array_i(10) +-// real(4) :: j +-// type(bottom_layer) :: nested +-// integer, allocatable :: array_j(:) +-// integer(4) :: k +-// end type top_layer +-// +-// type(top_layer) :: top_dtype +-// +-// map(tofrom: top_dtype%nested%i2, top_dtype%k, top_dtype%nested%array_i2) +-// +-// We would end up with an OmpMapParentAndMemberData populated like below: +-// +-// memberPlacementIndices: +-// Vector 1: 3, 0 +-// Vector 2: 5 +-// Vector 3: 3, 1 +-// +-// memberMap: +-// Entry 1: omp.map.info for "top_dtype%nested%i2" +-// Entry 2: omp.map.info for "top_dtype%k" +-// Entry 3: omp.map.info for "top_dtype%nested%array_i2" +-// +-// And this OmpMapParentAndMemberData would be accessed via the parent +-// symbol for top_dtype. Other parent derived type instances that have +-// members mapped would have there own OmpMapParentAndMemberData entry +-// accessed via their own symbol. +-struct OmpMapParentAndMemberData { - // The indices representing the component members placement in its derived - // type parents hierarchy. -- llvm::SmallVector memberPlacementIndices; +- llvm::SmallVector> memberPlacementIndices; - - // Placement of the member in the member vector. -- mlir::omp::MapInfoOp memberMap; +- llvm::SmallVector memberMap; +- +- bool isDuplicateMemberMapInfo(llvm::SmallVectorImpl &memberIndices) { +- return llvm::find_if(memberPlacementIndices, [&](auto &memberData) { +- return llvm::equal(memberIndices, memberData); +- }) != memberPlacementIndices.end(); +- } +- +- void addChildIndexAndMapToParent(const omp::Object &object, +- mlir::omp::MapInfoOp &mapOp, +- semantics::SemanticsContext &semaCtx); -}; - -mlir::omp::MapInfoOp -createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc, -- mlir::Value baseAddr, mlir::Value varPtrPtr, std::string name, -- mlir::ArrayRef bounds, -- mlir::ArrayRef members, -- mlir::DenseIntElementsAttr membersIndex, uint64_t mapType, +- mlir::Value baseAddr, mlir::Value varPtrPtr, +- llvm::StringRef name, llvm::ArrayRef bounds, +- llvm::ArrayRef members, +- mlir::ArrayAttr membersIndex, uint64_t mapType, - mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy, - bool partialMap = false); - --void addChildIndexAndMapToParent( -- const omp::Object &object, -- std::map> &parentMemberIndices, -- mlir::omp::MapInfoOp &mapOp, semantics::SemanticsContext &semaCtx); -- -void insertChildMapInfoIntoParent( -- lower::AbstractConverter &converter, -- std::map> &parentMemberIndices, +- Fortran::lower::AbstractConverter &converter, +- Fortran::semantics::SemanticsContext &semaCtx, +- Fortran::lower::StatementContext &stmtCtx, +- std::map &parentMemberIndices, - llvm::SmallVectorImpl &mapOperands, -- llvm::SmallVectorImpl &mapSyms, -- llvm::SmallVectorImpl *mapSymTypes, -- llvm::SmallVectorImpl *mapSymLocs); +- llvm::SmallVectorImpl &mapSyms); +- +-void generateMemberPlacementIndices( +- const Object &object, llvm::SmallVectorImpl &indices, +- Fortran::semantics::SemanticsContext &semaCtx); +- +-bool isMemberOrParentAllocatableOrPointer( +- const Object &object, Fortran::semantics::SemanticsContext &semaCtx); +- +-mlir::Value createParentSymAndGenIntermediateMaps( +- mlir::Location clauseLocation, Fortran::lower::AbstractConverter &converter, +- semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, +- omp::ObjectList &objectList, llvm::SmallVectorImpl &indices, +- OmpMapParentAndMemberData &parentMemberIndices, llvm::StringRef asFortran, +- llvm::omp::OpenMPOffloadMappingFlags mapTypeBits); +- +-omp::ObjectList gatherObjectsOf(omp::Object derivedTypeMember, +- semantics::SemanticsContext &semaCtx); - -mlir::Type getLoopVarType(lower::AbstractConverter &converter, - std::size_t loopVarTypeSize); @@ -6620,115 +6437,43 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Utils.h l - -int64_t getCollapseValue(const List &clauses); - --semantics::Symbol *getOmpObjectSymbol(const parser::OmpObject &ompObject); -- -void genObjectList(const ObjectList &objects, - lower::AbstractConverter &converter, - llvm::SmallVectorImpl &operands); - +-void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp, +- mlir::Location loc); +- -} // namespace omp -} // namespace lower -} // namespace Fortran - -#endif // FORTRAN_LOWER_OPENMPUTILS_H -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Builder/FIRBuilder.cpp llvm-project/flang/lib/Optimizer/Builder/FIRBuilder.cpp ---- llvm-project.orig/flang/lib/Optimizer/Builder/FIRBuilder.cpp 2024-06-12 10:43:12.624210453 -0500 -+++ llvm-project/flang/lib/Optimizer/Builder/FIRBuilder.cpp 2024-06-12 10:44:09.351614239 -0500 -@@ -250,7 +250,37 @@ - if (auto ompOutlineableIface = - getRegion() - .getParentOfType()) { -- return ompOutlineableIface.getAllocaBlock(); -+ // omp.parallel can work as a block construct but it can also be a loop -+ // wrapper when part of a composite construct. Make sure it's only treated -+ // as a block if it's not a wrapper. -+ auto parallelOp = -+ llvm::dyn_cast(*ompOutlineableIface); -+ if (!parallelOp || !llvm::isa_and_present( -+ parallelOp->getParentOp())) -+ return ompOutlineableIface.getAllocaBlock(); -+ } -+ -+ // All allocations associated with an OpenMP loop wrapper must happen outside -+ // of all wrappers. -+ mlir::Operation *currentOp = getRegion().getParentOp(); -+ auto wrapperIface = -+ llvm::isa(currentOp) -+ ? llvm::cast( -+ currentOp->getParentOp()) -+ : llvm::dyn_cast(currentOp); -+ if (wrapperIface) { -+ // Cannot use LoopWrapperInterface methods here because the whole nest may -+ // not have been created at this point. Manually traverse parents instead. -+ mlir::omp::LoopWrapperInterface lastWrapperOp = wrapperIface; -+ while (true) { -+ if (auto nextWrapper = -+ llvm::dyn_cast_if_present( -+ lastWrapperOp->getParentOp())) -+ lastWrapperOp = nextWrapper; -+ else -+ break; -+ } -+ return &lastWrapperOp->getParentRegion()->front(); - } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/CMakeLists.txt llvm-project-aso/flang/lib/Optimizer/OpenMP/CMakeLists.txt +--- llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/CMakeLists.txt 2024-11-23 20:25:26.843275164 -0600 ++++ llvm-project-aso/flang/lib/Optimizer/OpenMP/CMakeLists.txt 2024-11-23 20:39:47.180175366 -0600 +@@ -1,7 +1,9 @@ + get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) - if (auto recipeIface = -@@ -267,9 +297,15 @@ - llvm::ArrayRef attrs) { - assert(!mlir::isa(type) && "cannot be a reference"); - // If the alloca is inside an OpenMP Op which will be outlined then pin -- // the alloca here. -- const bool pinned = -+ // the alloca here. Make sure that an omp.parallel operation that is taking -+ // a loop wrapper role is not detected as outlineable here. -+ auto iface = - getRegion().getParentOfType(); -+ auto parallelOp = -+ iface ? llvm::dyn_cast(*iface) : nullptr; -+ const bool pinned = -+ iface && (!parallelOp || !llvm::isa_and_present( -+ parallelOp->getParentOp())); - mlir::Value temp = - create(loc, type, /*unique_name=*/llvm::StringRef{}, name, - pinned, lenParams, shape, attrs); -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp llvm-project/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp ---- llvm-project.orig/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp 2024-06-12 10:43:12.628210411 -0500 -+++ llvm-project/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp 2024-06-12 10:44:09.351614239 -0500 -@@ -278,9 +278,16 @@ - // 3. The first ancestor that is an OpenMP Op or a LLVMFuncOp - mlir::Block * - ConvertFIRToLLVMPattern::getBlockForAllocaInsert(mlir::Operation *op) const { -- if (auto iface = mlir::dyn_cast(op)) -- return iface.getAllocaBlock(); -- if (auto llvmFuncOp = mlir::dyn_cast(op)) -+ if (auto iface = -+ mlir::dyn_cast(op)) { -+ // omp.parallel can work as a block construct but it can also be a loop -+ // wrapper when it's part of a composite construct. Make sure it's only -+ // treated as a block if it's not a wrapper. -+ auto parallelOp = llvm::dyn_cast(*iface); -+ if (!parallelOp || !llvm::isa_and_present( -+ parallelOp->getParentOp())) -+ return iface.getAllocaBlock(); -+ } else if (auto llvmFuncOp = mlir::dyn_cast(op)) - return &llvmFuncOp.front(); - - return getBlockForAllocaInsert(op->getParentOp()); -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/CMakeLists.txt llvm-project/flang/lib/Optimizer/Transforms/CMakeLists.txt ---- llvm-project.orig/flang/lib/Optimizer/Transforms/CMakeLists.txt 2024-06-12 10:43:12.632210369 -0500 -+++ llvm-project/flang/lib/Optimizer/Transforms/CMakeLists.txt 2024-06-12 10:44:09.351614239 -0500 -@@ -24,6 +24,7 @@ - VScaleAttr.cpp - FunctionAttr.cpp - DebugTypeGenerator.cpp + add_flang_library(FlangOpenMPTransforms + DoConcurrentConversion.cpp - - DEPENDS - FIRDialect -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp llvm-project/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp ---- llvm-project.orig/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp 1969-12-31 18:00:00.000000000 -0600 -+++ llvm-project/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp 2024-06-12 10:44:09.351614239 -0500 -@@ -0,0 +1,625 @@ + FunctionFiltering.cpp ++ GlobalFiltering.cpp + MapsForPrivatizedSymbols.cpp + MapInfoFinalization.cpp + MarkDeclareTarget.cpp +@@ -21,6 +23,7 @@ + FIRSupport + FortranCommon + MLIRFuncDialect ++ MLIRMathTransforms + MLIROpenMPDialect + HLFIRDialect + MLIRIR +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp llvm-project-aso/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +--- llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp 2024-11-23 20:39:47.180175366 -0600 +@@ -0,0 +1,1037 @@ +//===- DoConcurrentConversion.cpp -- map `DO CONCURRENT` to OpenMP loops --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -6745,8 +6490,11 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D +#include "flang/Optimizer/Dialect/Support/FIRContext.h" +#include "flang/Optimizer/HLFIR/HLFIRDialect.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" -+#include "flang/Optimizer/Transforms/Passes.h" ++#include "flang/Optimizer/OpenMP/Passes.h" ++#include "mlir/Analysis/SliceAnalysis.h" ++#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" ++#include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/IRMapping.h" @@ -6755,15 +6503,17 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D +#include "mlir/Transforms/RegionUtils.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" + ++#include +#include +#include + -+namespace fir { ++namespace flangomp { +#define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS -+#include "flang/Optimizer/Transforms/Passes.h.inc" -+} // namespace fir ++#include "flang/Optimizer/OpenMP/Passes.h.inc" ++} // namespace flangomp + -+#define DEBUG_TYPE "fopenmp-do-concurrent-conversion" ++#define DEBUG_TYPE "do-concurrent-conversion" ++#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ") + +namespace Fortran { +namespace lower { @@ -6772,14 +6522,12 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D +// TODO The following 2 functions are copied from "flang/Lower/OpenMP/Utils.h". +// This duplication is temporary until we find a solution for a shared location +// for these utils that does not introduce circular CMake deps. -+mlir::omp::MapInfoOp -+createMapInfoOp(mlir::OpBuilder &builder, mlir::Location loc, -+ mlir::Value baseAddr, mlir::Value varPtrPtr, std::string name, -+ llvm::ArrayRef bounds, -+ llvm::ArrayRef members, -+ mlir::DenseIntElementsAttr membersIndex, uint64_t mapType, -+ mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy, -+ bool partialMap = false) { ++mlir::omp::MapInfoOp createMapInfoOp( ++ mlir::OpBuilder &builder, mlir::Location loc, mlir::Value baseAddr, ++ mlir::Value varPtrPtr, std::string name, llvm::ArrayRef bounds, ++ llvm::ArrayRef members, mlir::ArrayAttr membersIndex, ++ uint64_t mapType, mlir::omp::VariableCaptureKind mapCaptureType, ++ mlir::Type retTy, bool partialMap = false) { + if (auto boxTy = llvm::dyn_cast(baseAddr.getType())) { + baseAddr = builder.create(loc, baseAddr); + retTy = baseAddr.getType(); @@ -6788,6 +6536,13 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D + mlir::TypeAttr varType = mlir::TypeAttr::get( + llvm::cast(retTy).getElementType()); + ++ // For types with unknown extents such as <2x?xi32> we discard the incomplete ++ // type info and only retain the base type. The correct dimensions are later ++ // recovered through the bounds info. ++ if (auto seqType = llvm::dyn_cast(varType.getValue())) ++ if (seqType.hasDynamicExtents()) ++ varType = mlir::TypeAttr::get(seqType.getEleTy()); ++ + mlir::omp::MapInfoOp op = builder.create( + loc, retTy, baseAddr, varType, varPtrPtr, members, membersIndex, bounds, + builder.getIntegerAttr(builder.getIntegerType(64, false), mapType), @@ -6797,95 +6552,478 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D + return op; +} + -+mlir::Value calculateTripCount(fir::FirOpBuilder &builder, mlir::Location loc, -+ const mlir::omp::CollapseClauseOps &ops) { -+ using namespace mlir::arith; -+ assert(ops.loopLBVar.size() == ops.loopUBVar.size() && -+ ops.loopLBVar.size() == ops.loopStepVar.size() && -+ !ops.loopLBVar.empty() && "Invalid bounds or step"); -+ -+ // Get the bit width of an integer-like type. -+ auto widthOf = [](mlir::Type ty) -> unsigned { -+ if (mlir::isa(ty)) { -+ return mlir::IndexType::kInternalStorageBitWidth; -+ } -+ if (auto tyInt = mlir::dyn_cast(ty)) { -+ return tyInt.getWidth(); ++/// Check if cloning the bounds introduced any dependency on the outer region. ++/// If so, then either clone them as well if they are MemoryEffectFree, or else ++/// copy them to a new temporary and add them to the map and block_argument ++/// lists and replace their uses with the new temporary. ++/// ++/// TODO: similar to the above functions, this is copied from OpenMP lowering ++/// (in this case, from `genBodyOfTargetOp`). Once we move to a common lib for ++/// these utils this will move as well. ++void cloneOrMapRegionOutsiders(fir::FirOpBuilder &builder, ++ mlir::omp::TargetOp targetOp) { ++ mlir::Region &targetRegion = targetOp.getRegion(); ++ mlir::Block *targetEntryBlock = &targetRegion.getBlocks().front(); ++ llvm::SetVector valuesDefinedAbove; ++ mlir::getUsedValuesDefinedAbove(targetRegion, valuesDefinedAbove); ++ ++ while (!valuesDefinedAbove.empty()) { ++ for (mlir::Value val : valuesDefinedAbove) { ++ mlir::Operation *valOp = val.getDefiningOp(); ++ assert(valOp != nullptr); ++ if (mlir::isMemoryEffectFree(valOp)) { ++ mlir::Operation *clonedOp = valOp->clone(); ++ targetEntryBlock->push_front(clonedOp); ++ assert(clonedOp->getNumResults() == 1); ++ val.replaceUsesWithIf( ++ clonedOp->getResult(0), [targetEntryBlock](mlir::OpOperand &use) { ++ return use.getOwner()->getBlock() == targetEntryBlock; ++ }); ++ } else { ++ mlir::OpBuilder::InsertionGuard guard(builder); ++ builder.setInsertionPointAfter(valOp); ++ auto copyVal = builder.createTemporary(val.getLoc(), val.getType()); ++ builder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); ++ ++ llvm::SmallVector bounds; ++ std::stringstream name; ++ builder.setInsertionPoint(targetOp); ++ mlir::Value mapOp = createMapInfoOp( ++ builder, copyVal.getLoc(), copyVal, ++ /*varPtrPtr=*/mlir::Value{}, name.str(), bounds, ++ /*members=*/llvm::SmallVector{}, ++ /*membersIndex=*/mlir::ArrayAttr{}, ++ static_cast< ++ std::underlying_type_t>( ++ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), ++ mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); ++ targetOp.getMapVarsMutable().append(mapOp); ++ mlir::Value clonedValArg = ++ targetRegion.addArgument(copyVal.getType(), copyVal.getLoc()); ++ builder.setInsertionPointToStart(targetEntryBlock); ++ auto loadOp = ++ builder.create(clonedValArg.getLoc(), clonedValArg); ++ val.replaceUsesWithIf( ++ loadOp->getResult(0), [targetEntryBlock](mlir::OpOperand &use) { ++ return use.getOwner()->getBlock() == targetEntryBlock; ++ }); ++ } + } -+ llvm_unreachable("Unexpected type"); -+ }; ++ valuesDefinedAbove.clear(); ++ mlir::getUsedValuesDefinedAbove(targetRegion, valuesDefinedAbove); ++ } ++} ++} // namespace internal ++} // namespace omp ++} // namespace lower ++} // namespace Fortran ++ ++namespace { ++namespace looputils { ++/// Stores info needed about the induction/iteration variable for each `do ++/// concurrent` in a loop nest. This includes: ++/// * the operation allocating memory for iteration variable, ++/// * the operation(s) updating the iteration variable with the current ++/// iteration number. ++struct InductionVariableInfo { ++ mlir::Operation *iterVarMemDef; ++ llvm::SetVector indVarUpdateOps; ++}; ++ ++using LoopNestToIndVarMap = ++ llvm::MapVector; ++ ++/// Given an operation `op`, this returns true if `op`'s operand is ultimately ++/// the loop's induction variable. Detecting this helps finding the live-in ++/// value corresponding to the induction variable in case the induction variable ++/// is indirectly used in the loop (e.g. throught a cast op). ++bool isIndVarUltimateOperand(mlir::Operation *op, fir::DoLoopOp doLoop) { ++ while (op != nullptr && op->getNumOperands() > 0) { ++ auto ivIt = llvm::find_if(op->getOperands(), [&](mlir::Value operand) { ++ return operand == doLoop.getInductionVar(); ++ }); ++ ++ if (ivIt != op->getOperands().end()) ++ return true; ++ ++ op = op->getOperand(0).getDefiningOp(); ++ } ++ ++ return false; ++} ++ ++/// For the \p doLoop parameter, find the operations that declares its induction ++/// variable or allocates memory for it. ++mlir::Operation *findLoopIndVarMemDecl(fir::DoLoopOp doLoop) { ++ mlir::Value result = nullptr; ++ mlir::visitUsedValuesDefinedAbove( ++ doLoop.getRegion(), [&](mlir::OpOperand *operand) { ++ if (isIndVarUltimateOperand(operand->getOwner(), doLoop)) { ++ assert(result == nullptr && ++ "loop can have only one induction variable"); ++ result = operand->get(); ++ } ++ }); ++ ++ assert(result != nullptr && result.getDefiningOp() != nullptr); ++ return result.getDefiningOp(); ++} ++ ++/// Collect the list of values used inside the loop but defined outside of it. ++void collectLoopLiveIns(fir::DoLoopOp doLoop, ++ llvm::SmallVectorImpl &liveIns) { ++ llvm::SmallDenseSet seenValues; ++ llvm::SmallDenseSet seenOps; ++ ++ mlir::visitUsedValuesDefinedAbove( ++ doLoop.getRegion(), [&](mlir::OpOperand *operand) { ++ if (!seenValues.insert(operand->get()).second) ++ return; ++ ++ mlir::Operation *definingOp = operand->get().getDefiningOp(); ++ // We want to collect ops corresponding to live-ins only once. ++ if (definingOp && !seenOps.insert(definingOp).second) ++ return; ++ ++ liveIns.push_back(operand->get()); ++ }); ++} ++ ++/// Collects the op(s) responsible for updating a loop's iteration variable with ++/// the current iteration number. For example, for the input IR: ++/// ``` ++/// %i = fir.alloca i32 {bindc_name = "i"} ++/// %i_decl:2 = hlfir.declare %i ... ++/// ... ++/// fir.do_loop %i_iv = %lb to %ub step %step unordered { ++/// %1 = fir.convert %i_iv : (index) -> i32 ++/// fir.store %1 to %i_decl#1 : !fir.ref ++/// ... ++/// } ++/// ``` ++/// this function would return the first 2 ops in the `fir.do_loop`'s region. ++llvm::SetVector ++extractIndVarUpdateOps(fir::DoLoopOp doLoop) { ++ mlir::Value indVar = doLoop.getInductionVar(); ++ llvm::SetVector indVarUpdateOps; ++ ++ llvm::SmallVector toProcess; ++ toProcess.push_back(indVar); ++ ++ llvm::DenseSet done; ++ ++ while (!toProcess.empty()) { ++ mlir::Value val = toProcess.back(); ++ toProcess.pop_back(); ++ ++ if (!done.insert(val).second) ++ continue; + -+ // For a type that is either IntegerType or IndexType, return the -+ // equivalent IntegerType. In the former case this is a no-op. -+ auto asIntTy = [&](mlir::Type ty) -> mlir::IntegerType { -+ if (ty.isIndex()) { -+ return mlir::IntegerType::get(ty.getContext(), widthOf(ty)); ++ for (mlir::Operation *user : val.getUsers()) { ++ indVarUpdateOps.insert(user); ++ ++ for (mlir::Value result : user->getResults()) ++ toProcess.push_back(result); + } -+ assert(ty.isIntOrIndex() && "Unexpected type"); -+ return mlir::cast(ty); ++ } ++ ++ return std::move(indVarUpdateOps); ++} ++ ++/// Starting with a value at the end of a definition/conversion chain, walk the ++/// chain backwards and collect all the visited ops along the way. This is the ++/// same as the "backward slice" of the use-def chain of \p link. ++/// ++/// If the root of the chain/slice is a constant op (where convert operations ++/// on constant count as constants as well), then populate \p opChain with the ++/// extracted chain/slice. If not, then \p opChain will contains a single value: ++/// \p link. ++/// ++/// The purpose of this function is that we pull in the chain of ++/// constant+conversion ops inside the parallel region if possible; which ++/// prevents creating an unnecessary shared/mapped value that crosses the OpenMP ++/// region. ++/// ++/// For example, given this IR: ++/// ``` ++/// %c10 = arith.constant 10 : i32 ++/// %10 = fir.convert %c10 : (i32) -> index ++/// ``` ++/// and giving `%10` as the starting input: `link`, `defChain` would contain ++/// both of the above ops. ++void collectIndirectConstOpChain(mlir::Operation *link, ++ llvm::SetVector &opChain) { ++ mlir::BackwardSliceOptions options; ++ options.inclusive = true; ++ mlir::getBackwardSlice(link, &opChain, options); ++ ++ assert(!opChain.empty()); ++ ++ bool isConstantChain = [&]() { ++ if (!mlir::isa_and_present(opChain.front())) ++ return false; ++ ++ return llvm::all_of(llvm::drop_begin(opChain), [](mlir::Operation *op) { ++ return mlir::isa_and_present(op); ++ }); ++ }(); ++ ++ if (isConstantChain) ++ return; ++ ++ opChain.clear(); ++ opChain.insert(link); ++} ++ ++/// Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff ++/// there are no operations in \p outerloop's other than: ++/// ++/// 1. the operations needed to assing/update \p outerLoop's induction variable. ++/// 2. \p innerLoop itself. ++/// ++/// \p return true if \p innerLoop is perfectly nested inside \p outerLoop ++/// according to the above definition. ++bool isPerfectlyNested(fir::DoLoopOp outerLoop, fir::DoLoopOp innerLoop) { ++ mlir::BackwardSliceOptions backwardSliceOptions; ++ backwardSliceOptions.inclusive = true; ++ // We will collect the backward slices for innerLoop's LB, UB, and step. ++ // However, we want to limit the scope of these slices to the scope of ++ // outerLoop's region. ++ backwardSliceOptions.filter = [&](mlir::Operation *op) { ++ return !mlir::areValuesDefinedAbove(op->getResults(), ++ outerLoop.getRegion()); + }; + -+ // For two given values, establish a common signless IntegerType -+ // that can represent any value of type of x and of type of y, -+ // and return the pair of x, y converted to the new type. -+ auto unifyToSignless = -+ [&](fir::FirOpBuilder &b, mlir::Value x, -+ mlir::Value y) -> std::pair { -+ auto tyX = asIntTy(x.getType()), tyY = asIntTy(y.getType()); -+ unsigned width = std::max(widthOf(tyX), widthOf(tyY)); -+ auto wideTy = mlir::IntegerType::get(b.getContext(), width, -+ mlir::IntegerType::Signless); -+ return std::make_pair(b.createConvert(loc, wideTy, x), -+ b.createConvert(loc, wideTy, y)); ++ mlir::ForwardSliceOptions forwardSliceOptions; ++ forwardSliceOptions.inclusive = true; ++ // We don't care about the outer-loop's induction variable's uses within the ++ // inner-loop, so we filter out these uses. ++ forwardSliceOptions.filter = [&](mlir::Operation *op) { ++ return mlir::areValuesDefinedAbove(op->getResults(), innerLoop.getRegion()); + }; + -+ // Start with signless i32 by default. -+ auto tripCount = builder.createIntegerConstant(loc, builder.getI32Type(), 1); -+ -+ for (auto [origLb, origUb, origStep] : -+ llvm::zip(ops.loopLBVar, ops.loopUBVar, ops.loopStepVar)) { -+ auto tmpS0 = builder.createIntegerConstant(loc, origStep.getType(), 0); -+ auto [step, step0] = unifyToSignless(builder, origStep, tmpS0); -+ auto reverseCond = -+ builder.create(loc, CmpIPredicate::slt, step, step0); -+ auto negStep = builder.create(loc, step0, step); -+ mlir::Value absStep = -+ builder.create(loc, reverseCond, negStep, step); -+ -+ auto [lb, ub] = unifyToSignless(builder, origLb, origUb); -+ auto start = builder.create(loc, reverseCond, ub, lb); -+ auto end = builder.create(loc, reverseCond, lb, ub); -+ -+ mlir::Value range = builder.create(loc, end, start); -+ auto rangeCond = -+ builder.create(loc, CmpIPredicate::slt, end, start); -+ std::tie(range, absStep) = unifyToSignless(builder, range, absStep); -+ // numSteps = (range /u absStep) + 1 -+ auto numSteps = builder.create( -+ loc, builder.create(loc, range, absStep), -+ builder.createIntegerConstant(loc, range.getType(), 1)); -+ -+ auto trip0 = builder.createIntegerConstant(loc, numSteps.getType(), 0); -+ auto loopTripCount = -+ builder.create(loc, rangeCond, trip0, numSteps); -+ auto [totalTC, thisTC] = unifyToSignless(builder, tripCount, loopTripCount); -+ tripCount = builder.create(loc, totalTC, thisTC); ++ llvm::SetVector indVarSlice; ++ mlir::getForwardSlice(outerLoop.getInductionVar(), &indVarSlice, ++ forwardSliceOptions); ++ llvm::DenseSet innerLoopSetupOpsSet(indVarSlice.begin(), ++ indVarSlice.end()); ++ ++ llvm::DenseSet loopBodySet; ++ outerLoop.walk([&](mlir::Operation *op) { ++ if (op == outerLoop) ++ return mlir::WalkResult::advance(); ++ ++ if (op == innerLoop) ++ return mlir::WalkResult::skip(); ++ ++ if (mlir::isa(op)) ++ return mlir::WalkResult::advance(); ++ ++ loopBodySet.insert(op); ++ return mlir::WalkResult::advance(); ++ }); ++ ++ bool result = (loopBodySet == innerLoopSetupOpsSet); ++ mlir::Location loc = outerLoop.getLoc(); ++ LLVM_DEBUG(DBGS() << "Loop pair starting at location " << loc << " is" ++ << (result ? "" : " not") << " perfectly nested\n"); ++ ++ return result; ++} ++ ++/// Starting with `outerLoop` collect a perfectly nested loop nest, if any. This ++/// function collects as much as possible loops in the nest; it case it fails to ++/// recognize a certain nested loop as part of the nest it just returns the ++/// parent loops it discovered before. ++mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop, ++ LoopNestToIndVarMap &loopNest) { ++ assert(currentLoop.getUnordered()); ++ ++ while (true) { ++ loopNest.try_emplace( ++ currentLoop, ++ InductionVariableInfo{ ++ findLoopIndVarMemDecl(currentLoop), ++ std::move(looputils::extractIndVarUpdateOps(currentLoop))}); ++ ++ auto directlyNestedLoops = currentLoop.getRegion().getOps(); ++ llvm::SmallVector unorderedLoops; ++ ++ for (auto nestedLoop : directlyNestedLoops) ++ if (nestedLoop.getUnordered()) ++ unorderedLoops.push_back(nestedLoop); ++ ++ if (unorderedLoops.empty()) ++ break; ++ ++ if (unorderedLoops.size() > 1) ++ return mlir::failure(); ++ ++ fir::DoLoopOp nestedUnorderedLoop = unorderedLoops.front(); ++ ++ if (!isPerfectlyNested(currentLoop, nestedUnorderedLoop)) ++ return mlir::failure(); ++ ++ currentLoop = nestedUnorderedLoop; + } + -+ return tripCount; ++ return mlir::success(); +} -+} // namespace internal -+} // namespace omp -+} // namespace lower -+} // namespace Fortran + -+namespace { ++/// Prepares the `fir.do_loop` nest to be easily mapped to OpenMP. In ++/// particular, this function would take this input IR: ++/// ``` ++/// fir.do_loop %i_iv = %i_lb to %i_ub step %i_step unordered { ++/// fir.store %i_iv to %i#1 : !fir.ref ++/// %j_lb = arith.constant 1 : i32 ++/// %j_ub = arith.constant 10 : i32 ++/// %j_step = arith.constant 1 : index ++/// ++/// fir.do_loop %j_iv = %j_lb to %j_ub step %j_step unordered { ++/// fir.store %j_iv to %j#1 : !fir.ref ++/// ... ++/// } ++/// } ++/// ``` ++/// ++/// into the following form (using generic op form since the result is ++/// technically an invalid `fir.do_loop` op: ++/// ++/// ``` ++/// "fir.do_loop"(%i_lb, %i_ub, %i_step) <{unordered}> ({ ++/// ^bb0(%i_iv: index): ++/// %j_lb = "arith.constant"() <{value = 1 : i32}> : () -> i32 ++/// %j_ub = "arith.constant"() <{value = 10 : i32}> : () -> i32 ++/// %j_step = "arith.constant"() <{value = 1 : index}> : () -> index ++/// ++/// "fir.do_loop"(%j_lb, %j_ub, %j_step) <{unordered}> ({ ++/// ^bb0(%new_i_iv: index, %new_j_iv: index): ++/// "fir.store"(%new_i_iv, %i#1) : (i32, !fir.ref) -> () ++/// "fir.store"(%new_j_iv, %j#1) : (i32, !fir.ref) -> () ++/// ... ++/// }) ++/// ``` ++/// ++/// What happened to the loop nest is the following: ++/// ++/// * the innermost loop's entry block was updated from having one operand to ++/// having `n` operands where `n` is the number of loops in the nest, ++/// ++/// * the outer loop(s)' ops that update the IVs were sank inside the innermost ++/// loop (see the `"fir.store"(%new_i_iv, %i#1)` op above), ++/// ++/// * the innermost loop's entry block's arguments were mapped in order from the ++/// outermost to the innermost IV. ++/// ++/// With this IR change, we can directly inline the innermost loop's region into ++/// the newly generated `omp.loop_nest` op. ++/// ++/// Note that this function has a pre-condition that \p loopNest consists of ++/// perfectly nested loops; i.e. there are no in-between ops between 2 nested ++/// loops except for the ops to setup the inner loop's LB, UB, and step. These ++/// ops are handled/cloned by `genLoopNestClauseOps(..)`. ++void sinkLoopIVArgs(mlir::ConversionPatternRewriter &rewriter, ++ looputils::LoopNestToIndVarMap &loopNest) { ++ if (loopNest.size() <= 1) ++ return; ++ ++ fir::DoLoopOp innermostLoop = loopNest.back().first; ++ mlir::Operation &innermostFirstOp = innermostLoop.getRegion().front().front(); ++ ++ llvm::SmallVector argTypes; ++ llvm::SmallVector argLocs; ++ ++ for (auto &[doLoop, indVarInfo] : llvm::drop_end(loopNest)) { ++ // Sink the IV update ops to the innermost loop. We need to do for all loops ++ // except for the innermost one, hence the `drop_end` usage above. ++ for (mlir::Operation *op : indVarInfo.indVarUpdateOps) ++ op->moveBefore(&innermostFirstOp); ++ ++ argTypes.push_back(doLoop.getInductionVar().getType()); ++ argLocs.push_back(doLoop.getInductionVar().getLoc()); ++ } ++ ++ mlir::Region &innermmostRegion = innermostLoop.getRegion(); ++ // Extend the innermost entry block with arguments to represent the outer IVs. ++ innermmostRegion.addArguments(argTypes, argLocs); ++ ++ unsigned idx = 1; ++ // In reverse, remap the IVs of the loop nest from the old values to the new ++ // ones. We do that in reverse since the first argument before this loop is ++ // the old IV for the innermost loop. Therefore, we want to replace it first ++ // before the old value (1st argument in the block) is remapped to be the IV ++ // of the outermost loop in the nest. ++ for (auto &[doLoop, _] : llvm::reverse(loopNest)) { ++ doLoop.getInductionVar().replaceAllUsesWith( ++ innermmostRegion.getArgument(innermmostRegion.getNumArguments() - idx)); ++ ++idx; ++ } ++} ++ ++/// Collects values that are local to a loop: "loop-local values". A loop-local ++/// value is one that is used exclusively inside the loop but allocated outside ++/// of it. This usually corresponds to temporary values that are used inside the ++/// loop body for initialzing other variables for example. ++/// ++/// \param [in] doLoop - the loop within which the function searches for values ++/// used exclusively inside. ++/// ++/// \param [out] locals - the list of loop-local values detected for \p doLoop. ++static void collectLoopLocalValues(fir::DoLoopOp doLoop, ++ llvm::SetVector &locals) { ++ doLoop.walk([&](mlir::Operation *op) { ++ for (mlir::Value operand : op->getOperands()) { ++ if (locals.contains(operand)) ++ continue; ++ ++ bool isLocal = true; ++ ++ if (!mlir::isa_and_present(operand.getDefiningOp())) ++ continue; ++ ++ // Values defined inside the loop are not interesting since they do not ++ // need to be localized. ++ if (doLoop->isAncestor(operand.getDefiningOp())) ++ continue; ++ ++ for (auto *user : operand.getUsers()) { ++ if (!doLoop->isAncestor(user)) { ++ isLocal = false; ++ break; ++ } ++ } ++ ++ if (isLocal) ++ locals.insert(operand); ++ } ++ }); ++} ++ ++/// For a "loop-local" value \p local within a loop's scope, localizes that ++/// value within the scope of the parallel region the loop maps to. Towards that ++/// end, this function moves the allocation of \p local within \p allocRegion. ++/// ++/// \param local - the value used exclusively within a loop's scope (see ++/// collectLoopLocalValues). ++/// ++/// \param allocRegion - the parallel region where \p local's allocation will be ++/// privatized. ++/// ++/// \param rewriter - builder used for updating \p allocRegion. ++static void localizeLoopLocalValue(mlir::Value local, mlir::Region &allocRegion, ++ mlir::ConversionPatternRewriter &rewriter) { ++ rewriter.moveOpBefore(local.getDefiningOp(), &allocRegion.front().front()); ++} ++} // namespace looputils ++ +class DoConcurrentConversion : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + -+ DoConcurrentConversion(mlir::MLIRContext *context, bool mapToDevice) -+ : OpConversionPattern(context), mapToDevice(mapToDevice) {} ++ DoConcurrentConversion(mlir::MLIRContext *context, bool mapToDevice, ++ llvm::DenseSet &concurrentLoopsToSkip) ++ : OpConversionPattern(context), mapToDevice(mapToDevice), ++ concurrentLoopsToSkip(concurrentLoopsToSkip) {} + + mlir::LogicalResult + matchAndRewrite(fir::DoLoopOp doLoop, OpAdaptor adaptor, @@ -6900,116 +7038,99 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D + "defining operation."); + } + -+ std::function isOpUltimatelyConstant = -+ [&](mlir::Operation *operation) { -+ if (mlir::isa_and_present(operation)) -+ return true; ++ looputils::LoopNestToIndVarMap loopNest; ++ bool hasRemainingNestedLoops = ++ failed(looputils::collectLoopNest(doLoop, loopNest)); ++ if (hasRemainingNestedLoops) ++ mlir::emitWarning(doLoop.getLoc(), ++ "Some `do concurent` loops are not perfectly-nested. " ++ "These will be serialzied."); ++ ++ llvm::SmallVector loopNestLiveIns; ++ looputils::collectLoopLiveIns(loopNest.back().first, loopNestLiveIns); ++ assert(!loopNestLiveIns.empty()); ++ ++ llvm::SetVector locals; ++ looputils::collectLoopLocalValues(loopNest.back().first, locals); ++ // We do not want to map "loop-local" values to the device through ++ // `omp.map.info` ops. Therefore, we remove them from the list of live-ins. ++ loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns, ++ [&](mlir::Value liveIn) { ++ return locals.contains(liveIn); ++ }), ++ loopNestLiveIns.end()); ++ ++ looputils::sinkLoopIVArgs(rewriter, loopNest); + -+ if (auto convertOp = -+ mlir::dyn_cast_if_present(operation)) -+ return isOpUltimatelyConstant(convertOp.getValue().getDefiningOp()); -+ -+ return false; -+ }; -+ -+ if (!isOpUltimatelyConstant(lbOp) || !isOpUltimatelyConstant(ubOp) || -+ !isOpUltimatelyConstant(stepOp)) { -+ return rewriter.notifyMatchFailure( -+ doLoop, "`do concurrent` conversion is currently only supported for " -+ "constant LB, UB, and step values."); -+ } -+ -+ llvm::SmallVector liveIns; -+ collectLoopLiveIns(doLoop, liveIns); -+ assert(!liveIns.empty()); ++ mlir::omp::TargetOp targetOp; ++ mlir::omp::LoopNestOperands loopNestClauseOps; + + mlir::IRMapping mapper; -+ mlir::omp::TargetOp targetOp; -+ mlir::omp::LoopNestClauseOps loopNestClauseOps; + + if (mapToDevice) { -+ mlir::omp::TargetClauseOps clauseOps; -+ for (mlir::Value liveIn : liveIns) -+ clauseOps.mapVars.push_back(genMapInfoOpForLiveIn(rewriter, liveIn)); -+ targetOp = -+ genTargetOp(doLoop.getLoc(), rewriter, mapper, liveIns, clauseOps); -+ genTeamsOp(doLoop.getLoc(), rewriter, doLoop, liveIns, mapper, -+ loopNestClauseOps); -+ genDistributeOp(doLoop.getLoc(), rewriter); -+ } ++ // TODO: Currently the loop bounds for the outer loop are duplicated. ++ mlir::omp::TargetOperands targetClauseOps; ++ genLoopNestClauseOps(doLoop.getLoc(), rewriter, loopNest, mapper, ++ loopNestClauseOps, &targetClauseOps); ++ ++ // Prevent mapping host-evaluated variables. ++ loopNestLiveIns.erase( ++ llvm::remove_if(loopNestLiveIns, ++ [&](mlir::Value liveIn) { ++ return llvm::is_contained( ++ targetClauseOps.hostEvalVars, liveIn); ++ }), ++ loopNestLiveIns.end()); ++ ++ // The outermost loop will contain all the live-in values in all nested ++ // loops since live-in values are collected recursively for all nested ++ // ops. ++ for (mlir::Value liveIn : loopNestLiveIns) ++ targetClauseOps.mapVars.push_back( ++ genMapInfoOpForLiveIn(rewriter, liveIn)); + -+ genParallelOp(doLoop.getLoc(), rewriter, doLoop, liveIns, mapper, -+ loopNestClauseOps); -+ genWsLoopOp(rewriter, doLoop, mapper, loopNestClauseOps); ++ targetOp = ++ genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns, ++ targetClauseOps, loopNestClauseOps); + -+ // Now that we created the nested `ws.loop` op, we set can the `target` op's -+ // trip count. -+ if (mapToDevice) { -+ rewriter.setInsertionPoint(targetOp); -+ auto parentModule = doLoop->getParentOfType(); -+ fir::FirOpBuilder firBuilder(rewriter, fir::getKindMapping(parentModule)); -+ -+ mlir::omp::CollapseClauseOps collapseClauseOps; -+ collapseClauseOps.loopLBVar.push_back(lbOp->getResult(0)); -+ collapseClauseOps.loopUBVar.push_back(ubOp->getResult(0)); -+ collapseClauseOps.loopStepVar.push_back(stepOp->getResult(0)); -+ -+ mlir::cast(targetOp).getTripCountMutable().assign( -+ Fortran::lower::omp::internal::calculateTripCount( -+ firBuilder, doLoop.getLoc(), collapseClauseOps)); ++ genTeamsOp(doLoop.getLoc(), rewriter); + } + -+ rewriter.eraseOp(doLoop); -+ return mlir::success(); -+ } -+ -+private: -+ /// Collect the list of values used inside the loop but defined outside of it. -+ /// The first item in the returned list is always the loop's induction -+ /// variable. -+ void collectLoopLiveIns(fir::DoLoopOp doLoop, -+ llvm::SmallVectorImpl &liveIns) const { -+ // Given an operation `op`, this lambda returns true if `op`'s operand is -+ // ultimately the loop's induction variable. Detecting this helps finding -+ // the live-in value corresponding to the induction variable in case the -+ // induction variable is indirectly used in the loop (e.g. throught a cast -+ // op). -+ std::function isIndVarUltimateOperand = -+ [&](mlir::Operation *op) { -+ if (auto storeOp = mlir::dyn_cast_if_present(op)) { -+ return (storeOp.getValue() == doLoop.getInductionVar()) || -+ isIndVarUltimateOperand(storeOp.getValue().getDefiningOp()); -+ } ++ mlir::omp::ParallelOp parallelOp = ++ genParallelOp(doLoop.getLoc(), rewriter, loopNest, mapper); ++ // Only set as composite when part of `distribute parallel do`. ++ parallelOp.setComposite(mapToDevice); + -+ if (auto convertOp = mlir::dyn_cast_if_present(op)) { -+ return convertOp.getOperand() == doLoop.getInductionVar() || -+ isIndVarUltimateOperand( -+ convertOp.getValue().getDefiningOp()); -+ } ++ if (!mapToDevice) ++ genLoopNestClauseOps(doLoop.getLoc(), rewriter, loopNest, mapper, ++ loopNestClauseOps); + -+ return false; -+ }; ++ for (mlir::Value local : locals) ++ looputils::localizeLoopLocalValue(local, parallelOp.getRegion(), ++ rewriter); + -+ llvm::SmallDenseSet seenValues; -+ llvm::SmallDenseSet seenOps; ++ if (mapToDevice) ++ genDistributeOp(doLoop.getLoc(), rewriter).setComposite(/*val=*/true); + -+ mlir::visitUsedValuesDefinedAbove( -+ doLoop.getRegion(), [&](mlir::OpOperand *operand) { -+ if (!seenValues.insert(operand->get()).second) -+ return; ++ mlir::omp::LoopNestOp ompLoopNest = ++ genWsLoopOp(rewriter, loopNest.back().first, mapper, loopNestClauseOps, ++ /*isComposite=*/mapToDevice); + -+ mlir::Operation *definingOp = operand->get().getDefiningOp(); -+ // We want to collect ops corresponding to live-ins only once. -+ if (definingOp && !seenOps.insert(definingOp).second) -+ return; ++ rewriter.eraseOp(doLoop); + -+ liveIns.push_back(operand->get()); ++ // Mark `unordered` loops that are not perfectly nested to be skipped from ++ // the legality check of the `ConversionTarget` since we are not interested ++ // in mapping them to OpenMP. ++ ompLoopNest->walk([&](fir::DoLoopOp doLoop) { ++ if (doLoop.getUnordered()) { ++ concurrentLoopsToSkip.insert(doLoop); ++ } ++ }); + -+ if (isIndVarUltimateOperand(operand->getOwner())) -+ std::swap(*liveIns.begin(), *liveIns.rbegin()); -+ }); ++ return mlir::success(); + } + ++private: + void genBoundsOps(mlir::ConversionPatternRewriter &rewriter, + mlir::Location loc, hlfir::DeclareOp declareOp, + llvm::SmallVectorImpl &boundsOps) const { @@ -7073,42 +7194,78 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D + llvm::SmallVector boundsOps; + genBoundsOps(rewriter, liveIn.getLoc(), declareOp, boundsOps); + -+ return Fortran::lower::omp ::internal::createMapInfoOp( -+ rewriter, liveIn.getLoc(), declareOp.getBase(), /*varPtrPtr=*/{}, -+ declareOp.getUniqName().str(), boundsOps, /*members=*/{}, -+ /*membersIndex=*/mlir::DenseIntElementsAttr{}, ++ // Use the raw address to avoid unboxing `fir.box` values whenever possible. ++ // Put differently, if we have access to the direct value memory ++ // reference/address, we use it. ++ mlir::Value rawAddr = declareOp.getOriginalBase(); ++ return Fortran::lower::omp::internal::createMapInfoOp( ++ rewriter, liveIn.getLoc(), rawAddr, ++ /*varPtrPtr=*/{}, declareOp.getUniqName().str(), boundsOps, ++ /*members=*/{}, ++ /*membersIndex=*/mlir::ArrayAttr{}, + static_cast< + std::underlying_type_t>( + mapFlag), -+ captureKind, liveInType); ++ captureKind, rawAddr.getType()); + } + -+ mlir::omp::TargetOp genTargetOp(mlir::Location loc, -+ mlir::ConversionPatternRewriter &rewriter, -+ mlir::IRMapping &mapper, -+ llvm::ArrayRef liveIns, -+ mlir::omp::TargetClauseOps &clauseOps) const { ++ mlir::omp::TargetOp ++ genTargetOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, ++ mlir::IRMapping &mapper, llvm::ArrayRef mappedVars, ++ mlir::omp::TargetOperands &clauseOps, ++ mlir::omp::LoopNestOperands &loopNestClauseOps) const { + auto targetOp = rewriter.create(loc, clauseOps); ++ auto argIface = llvm::cast(*targetOp); + + mlir::Region ®ion = targetOp.getRegion(); + -+ llvm::SmallVector liveInTypes; -+ llvm::SmallVector liveInLocs; ++ llvm::SmallVector regionArgTypes; ++ llvm::SmallVector regionArgLocs; + -+ for (mlir::Value liveIn : liveIns) { -+ liveInTypes.push_back(liveIn.getType()); -+ liveInLocs.push_back(liveIn.getLoc()); ++ for (auto var : ++ llvm::concat(clauseOps.hostEvalVars, mappedVars)) { ++ regionArgTypes.push_back(var.getType()); ++ regionArgLocs.push_back(var.getLoc()); + } + -+ rewriter.createBlock(®ion, {}, liveInTypes, liveInLocs); ++ rewriter.createBlock(®ion, {}, regionArgTypes, regionArgLocs); + + for (auto [arg, mapInfoOp] : -+ llvm::zip_equal(region.getArguments(), clauseOps.mapVars)) { ++ llvm::zip_equal(argIface.getMapBlockArgs(), clauseOps.mapVars)) { + auto miOp = mlir::cast(mapInfoOp.getDefiningOp()); + hlfir::DeclareOp liveInDeclare = genLiveInDeclare(rewriter, arg, miOp); -+ mapper.map(miOp.getVariableOperand(0), liveInDeclare.getBase()); ++ mlir::Value miOperand = miOp.getVariableOperand(0); ++ ++ // TODO If `miOperand.getDefiningOp()` is a `fir::BoxAddrOp`, we probably ++ // need to "unpack" the box by getting the defining op of it's value. ++ // However, we did not hit this case in reality yet so leaving it as a ++ // todo for now. ++ ++ mapper.map(miOperand, liveInDeclare.getOriginalBase()); ++ ++ if (auto origDeclareOp = mlir::dyn_cast_if_present( ++ miOperand.getDefiningOp())) ++ mapper.map(origDeclareOp.getBase(), liveInDeclare.getBase()); ++ } ++ ++ for (auto [arg, hostEval] : llvm::zip_equal(argIface.getHostEvalBlockArgs(), ++ clauseOps.hostEvalVars)) ++ mapper.map(hostEval, arg); ++ ++ for (unsigned i = 0; i < loopNestClauseOps.loopLowerBounds.size(); ++i) { ++ loopNestClauseOps.loopLowerBounds[i] = ++ mapper.lookup(loopNestClauseOps.loopLowerBounds[i]); ++ loopNestClauseOps.loopUpperBounds[i] = ++ mapper.lookup(loopNestClauseOps.loopUpperBounds[i]); ++ loopNestClauseOps.loopSteps[i] = ++ mapper.lookup(loopNestClauseOps.loopSteps[i]); + } + ++ fir::FirOpBuilder firBuilder( ++ rewriter, ++ fir::getKindMapping(targetOp->getParentOfType())); ++ Fortran::lower::omp::internal::cloneOrMapRegionOutsiders(firBuilder, ++ targetOp); + rewriter.setInsertionPoint( + rewriter.create(targetOp.getLoc())); + @@ -7158,28 +7315,23 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D + } + + mlir::omp::TeamsOp -+ genTeamsOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, -+ fir::DoLoopOp doLoop, llvm::ArrayRef liveIns, -+ mlir::IRMapping &mapper, -+ mlir::omp::LoopNestClauseOps &loopNestClauseOps) const { ++ genTeamsOp(mlir::Location loc, ++ mlir::ConversionPatternRewriter &rewriter) const { + auto teamsOp = rewriter.create( -+ loc, /*clauses=*/mlir::omp::TeamsClauseOps{}); ++ loc, /*clauses=*/mlir::omp::TeamsOperands{}); + + rewriter.createBlock(&teamsOp.getRegion()); + rewriter.setInsertionPoint(rewriter.create(loc)); + -+ genInductionVariableAlloc(rewriter, liveIns, mapper); -+ genLoopNestClauseOps(loc, rewriter, doLoop, mapper, loopNestClauseOps); -+ + return teamsOp; + } + -+ void -+ genLoopNestClauseOps(mlir::Location loc, -+ mlir::ConversionPatternRewriter &rewriter, -+ fir::DoLoopOp doLoop, mlir::IRMapping &mapper, -+ mlir::omp::LoopNestClauseOps &loopNestClauseOps) const { -+ assert(loopNestClauseOps.loopLBVar.empty() && ++ void genLoopNestClauseOps( ++ mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, ++ looputils::LoopNestToIndVarMap &loopNest, mlir::IRMapping &mapper, ++ mlir::omp::LoopNestOperands &loopNestClauseOps, ++ mlir::omp::TargetOperands *targetClauseOps = nullptr) const { ++ assert(loopNestClauseOps.loopLowerBounds.empty() && + "Loop nest bounds were already emitted!"); + + // Clones the chain of ops defining a certain loop bound or its step into @@ -7187,53 +7339,60 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D + // `fir.convert`op, this lambda clones the `fir.convert` as well as the + // value it converts from. We do this since `omp.target` regions are + // isolated from above. -+ std::function -+ cloneBoundOrStepDefChain = [&](mlir::Operation *operation) { -+ if (mlir::isa_and_present(operation)) -+ return rewriter.clone(*operation, mapper); -+ -+ if (auto convertOp = -+ mlir::dyn_cast_if_present(operation)) { -+ cloneBoundOrStepDefChain(convertOp.getValue().getDefiningOp()); -+ return rewriter.clone(*operation, mapper); -+ } ++ auto cloneBoundOrStepOpChain = ++ [&](mlir::Operation *operation) -> mlir::Operation * { ++ llvm::SetVector opChain; ++ looputils::collectIndirectConstOpChain(operation, opChain); + -+ std::string opStr; -+ llvm::raw_string_ostream opOs(opStr); -+ opOs << "Unexpected operation: " << *operation; -+ llvm_unreachable(opOs.str().c_str()); -+ }; ++ mlir::Operation *result; ++ for (mlir::Operation *link : opChain) ++ result = rewriter.clone(*link, mapper); + -+ mlir::Operation *lbOp = doLoop.getLowerBound().getDefiningOp(); -+ mlir::Operation *ubOp = doLoop.getUpperBound().getDefiningOp(); -+ mlir::Operation *stepOp = doLoop.getStep().getDefiningOp(); ++ return result; ++ }; ++ ++ auto hostEvalCapture = [&](mlir::Value var, ++ llvm::SmallVectorImpl &bounds) { ++ var = cloneBoundOrStepOpChain(var.getDefiningOp())->getResult(0); ++ bounds.push_back(var); ++ ++ if (targetClauseOps) ++ targetClauseOps->hostEvalVars.push_back(var); ++ }; ++ ++ for (auto &[doLoop, _] : loopNest) { ++ hostEvalCapture(doLoop.getLowerBound(), ++ loopNestClauseOps.loopLowerBounds); ++ hostEvalCapture(doLoop.getUpperBound(), ++ loopNestClauseOps.loopUpperBounds); ++ hostEvalCapture(doLoop.getStep(), loopNestClauseOps.loopSteps); ++ } + -+ loopNestClauseOps.loopLBVar.push_back( -+ cloneBoundOrStepDefChain(lbOp)->getResult(0)); -+ loopNestClauseOps.loopLBVar.push_back( -+ cloneBoundOrStepDefChain(ubOp)->getResult(0)); -+ loopNestClauseOps.loopLBVar.push_back( -+ cloneBoundOrStepDefChain(stepOp)->getResult(0)); -+ loopNestClauseOps.loopInclusiveAttr = rewriter.getUnitAttr(); ++ loopNestClauseOps.loopInclusive = rewriter.getUnitAttr(); + } + + mlir::omp::DistributeOp + genDistributeOp(mlir::Location loc, + mlir::ConversionPatternRewriter &rewriter) const { + auto distOp = rewriter.create( -+ loc, /*clauses=*/mlir::omp::DistributeClauseOps{}); ++ loc, /*clauses=*/mlir::omp::DistributeOperands{}); + + rewriter.createBlock(&distOp.getRegion()); -+ rewriter.setInsertionPoint(rewriter.create(loc)); -+ + return distOp; + } + -+ void genInductionVariableAlloc(mlir::ConversionPatternRewriter &rewriter, -+ llvm::ArrayRef liveIns, -+ mlir::IRMapping &mapper) const { -+ mlir::Operation *indVarMemDef = liveIns.front().getDefiningOp(); ++ void genLoopNestIndVarAllocs(mlir::ConversionPatternRewriter &rewriter, ++ looputils::LoopNestToIndVarMap &loopNest, ++ mlir::IRMapping &mapper) const { ++ ++ for (auto &[_, indVarInfo] : loopNest) ++ genInductionVariableAlloc(rewriter, indVarInfo.iterVarMemDef, mapper); ++ } + ++ mlir::Operation * ++ genInductionVariableAlloc(mlir::ConversionPatternRewriter &rewriter, ++ mlir::Operation *indVarMemDef, ++ mlir::IRMapping &mapper) const { + assert( + indVarMemDef != nullptr && + "Induction variable memdef is expected to have a defining operation."); @@ -7243,38 +7402,34 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D + indVarDeclareAndAlloc.insert(operand.getDefiningOp()); + indVarDeclareAndAlloc.insert(indVarMemDef); + ++ mlir::Operation *result; + for (mlir::Operation *opToClone : indVarDeclareAndAlloc) -+ rewriter.clone(*opToClone, mapper); ++ result = rewriter.clone(*opToClone, mapper); ++ ++ return result; + } + -+ mlir::omp::ParallelOp -+ genParallelOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, -+ fir::DoLoopOp doLoop, llvm::ArrayRef liveIns, -+ mlir::IRMapping &mapper, -+ mlir::omp::LoopNestClauseOps &loopNestClauseOps) const { ++ mlir::omp::ParallelOp genParallelOp(mlir::Location loc, ++ mlir::ConversionPatternRewriter &rewriter, ++ looputils::LoopNestToIndVarMap &loopNest, ++ mlir::IRMapping &mapper) const { + auto parallelOp = rewriter.create(loc); + rewriter.createBlock(¶llelOp.getRegion()); + rewriter.setInsertionPoint(rewriter.create(loc)); + -+ // If mapping to host, the local induction variable and loop bounds need to -+ // be emitted as part of the `omp.parallel` op. -+ if (!mapToDevice) { -+ genInductionVariableAlloc(rewriter, liveIns, mapper); -+ genLoopNestClauseOps(loc, rewriter, doLoop, mapper, loopNestClauseOps); -+ } -+ ++ genLoopNestIndVarAllocs(rewriter, loopNest, mapper); + return parallelOp; + } + + mlir::omp::LoopNestOp + genWsLoopOp(mlir::ConversionPatternRewriter &rewriter, fir::DoLoopOp doLoop, + mlir::IRMapping &mapper, -+ const mlir::omp::LoopNestClauseOps &clauseOps) const { ++ const mlir::omp::LoopNestOperands &clauseOps, ++ bool isComposite) const { + + auto wsloopOp = rewriter.create(doLoop.getLoc()); ++ wsloopOp.setComposite(isComposite); + rewriter.createBlock(&wsloopOp.getRegion()); -+ rewriter.setInsertionPoint( -+ rewriter.create(wsloopOp.getLoc())); + + auto loopNestOp = + rewriter.create(doLoop.getLoc(), clauseOps); @@ -7293,19 +7448,17 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D + } + + bool mapToDevice; ++ llvm::DenseSet &concurrentLoopsToSkip; +}; + +class DoConcurrentConversionPass -+ : public fir::impl::DoConcurrentConversionPassBase< ++ : public flangomp::impl::DoConcurrentConversionPassBase< + DoConcurrentConversionPass> { +public: -+ using fir::impl::DoConcurrentConversionPassBase< -+ DoConcurrentConversionPass>::DoConcurrentConversionPassBase; -+ + DoConcurrentConversionPass() = default; + + DoConcurrentConversionPass( -+ const fir::DoConcurrentConversionPassOptions &options) ++ const flangomp::DoConcurrentConversionPassOptions &options) + : DoConcurrentConversionPassBase(options) {} + + void runOnOperation() override { @@ -7317,24 +7470,28 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D + + auto *context = &getContext(); + -+ if (mapTo != fir::omp::DoConcurrentMappingKind::DCMK_Host && -+ mapTo != fir::omp::DoConcurrentMappingKind::DCMK_Device) { ++ if (mapTo != flangomp::DoConcurrentMappingKind::DCMK_Host && ++ mapTo != flangomp::DoConcurrentMappingKind::DCMK_Device) { + mlir::emitWarning(mlir::UnknownLoc::get(context), + "DoConcurrentConversionPass: invalid `map-to` value. " + "Valid values are: `host` or `device`"); + return; + } -+ ++ llvm::DenseSet concurrentLoopsToSkip; + mlir::RewritePatternSet patterns(context); + patterns.insert( -+ context, mapTo == fir::omp::DoConcurrentMappingKind::DCMK_Device); ++ context, mapTo == flangomp::DoConcurrentMappingKind::DCMK_Device, ++ concurrentLoopsToSkip); + mlir::ConversionTarget target(*context); -+ target.addLegalDialect(); -+ -+ target.addDynamicallyLegalOp( -+ [](fir::DoLoopOp op) { return !op.getUnordered(); }); ++ target ++ .addLegalDialect(); ++ ++ target.addDynamicallyLegalOp([&](fir::DoLoopOp op) { ++ return !op.getUnordered() || concurrentLoopsToSkip.contains(op); ++ }); + + if (mlir::failed(mlir::applyFullConversion(getOperation(), target, + std::move(patterns)))) { @@ -7347,98 +7504,1225 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D +} // namespace + +std::unique_ptr -+fir::createDoConcurrentConversionPass(bool mapToDevice) { ++flangomp::createDoConcurrentConversionPass(bool mapToDevice) { + DoConcurrentConversionPassOptions options; -+ options.mapTo = mapToDevice ? fir::omp::DoConcurrentMappingKind::DCMK_Device -+ : fir::omp::DoConcurrentMappingKind::DCMK_Host; ++ options.mapTo = mapToDevice ? flangomp::DoConcurrentMappingKind::DCMK_Device ++ : flangomp::DoConcurrentMappingKind::DCMK_Host; + + return std::make_unique(options); +} -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/StackArrays.cpp llvm-project/flang/lib/Optimizer/Transforms/StackArrays.cpp ---- llvm-project.orig/flang/lib/Optimizer/Transforms/StackArrays.cpp 2024-06-12 10:43:12.632210369 -0500 -+++ llvm-project/flang/lib/Optimizer/Transforms/StackArrays.cpp 2024-06-12 10:44:09.351614239 -0500 -@@ -571,8 +571,31 @@ - return {point}; - }; - -- auto oldOmpRegion = -- oldAlloc->getParentOfType(); -+ // Find the first OpenMP outlineable parent region while taking into account -+ // the possibility of finding an omp.parallel region that is taking a loop -+ // wrapper role. These operations must be skipped, as they cannot hold -+ // allocations. -+ const auto findOmpRegion = [](mlir::Operation *op) { -+ auto findOmpRegionImpl = -+ [](mlir::Operation *op, -+ auto &findOmpRegion) -> mlir::omp::OutlineableOpenMPOpInterface { -+ auto ompRegion = -+ op->getParentOfType(); -+ if (!ompRegion) -+ return nullptr; -+ -+ if (auto parallelOp = -+ mlir::dyn_cast_if_present(*ompRegion)) { -+ mlir::Operation *parentOp = parallelOp->getParentOp(); -+ if (mlir::isa_and_present(parentOp)) -+ return findOmpRegion(parentOp, findOmpRegion); +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp llvm-project-aso/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp +--- llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp 2024-11-23 20:39:47.180175366 -0600 +@@ -0,0 +1,70 @@ ++//===- GlobalFiltering.cpp ------------------------------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file implements transforms to filter out functions intended for the host ++// when compiling for the device and vice versa. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "flang/Optimizer/Dialect/FIRDialect.h" ++#include "flang/Optimizer/Dialect/FIROpsSupport.h" ++#include "flang/Optimizer/OpenMP/Passes.h" ++ ++#include "mlir/Dialect/Func/IR/FuncOps.h" ++#include "mlir/Dialect/OpenMP/OpenMPDialect.h" ++#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" ++#include "mlir/IR/BuiltinOps.h" ++#include "llvm/ADT/SmallVector.h" ++ ++namespace flangomp { ++#define GEN_PASS_DEF_GLOBALFILTERINGPASS ++#include "flang/Optimizer/OpenMP/Passes.h.inc" ++} // namespace flangomp ++ ++using namespace mlir; ++ ++namespace { ++// TODO Remove this pass when AOMP moves to `clang-linker-wrapper` (instead of ++// `clang-offload-packager`). ++class GlobalFilteringPass ++ : public flangomp::impl::GlobalFilteringPassBase { ++public: ++ GlobalFilteringPass() = default; ++ ++ void runOnOperation() override { ++ auto op = dyn_cast(getOperation()); ++ if (!op || !op.getIsTargetDevice()) ++ return; ++ ++ op->walk([&](fir::GlobalOp globalOp) { ++ bool symbolUnused = true; ++ SymbolTable::UseRange globalUses = *globalOp.getSymbolUses(op); ++ for (SymbolTable::SymbolUse use : globalUses) { ++ if (use.getUser() == globalOp) ++ continue; ++ symbolUnused = false; ++ break; + } -+ return ompRegion; -+ }; -+ return findOmpRegionImpl(op, findOmpRegionImpl); -+ }; + -+ auto oldOmpRegion = findOmpRegion(oldAlloc); - - // Find when the last operand value becomes available - mlir::Block *operandsBlock = nullptr; -@@ -600,8 +623,7 @@ - LLVM_DEBUG(llvm::dbgs() - << "--Placing after last operand: " << *lastOperand << "\n"); - // check we aren't moving out of an omp region -- auto lastOpOmpRegion = -- lastOperand->getParentOfType(); -+ auto lastOpOmpRegion = findOmpRegion(lastOperand); - if (lastOpOmpRegion == oldOmpRegion) - return checkReturn(lastOperand); - // Presumably this happened because the operands became ready before the -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir llvm-project/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir ---- llvm-project.orig/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir 2024-06-12 10:43:12.656210116 -0500 -+++ llvm-project/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir 2024-06-12 10:44:09.351614239 -0500 -@@ -199,6 +199,7 @@ - fir.store %3 to %6 : !fir.ref - omp.yield ++ // Look for declare target information in case this global is intended to ++ // always exist on the device. ++ auto declareTargetIface = ++ llvm::dyn_cast( ++ globalOp.getOperation()); ++ bool hostOnlySymbol = !declareTargetIface || ++ !declareTargetIface.isDeclareTarget() || ++ declareTargetIface.getDeclareTargetDeviceType() == ++ omp::DeclareTargetDeviceType::host; ++ ++ // Remove unused host symbols with external linkage. ++ if (symbolUnused && !globalOp.getLinkName() && hostOnlySymbol) ++ globalOp.erase(); ++ }); ++ } ++}; ++} // namespace +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp llvm-project-aso/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +--- llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp 2024-11-23 20:25:26.843275164 -0600 ++++ llvm-project-aso/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp 2024-11-23 20:39:47.180175366 -0600 +@@ -62,7 +62,7 @@ + /// Tracks any intermediate function/subroutine local allocations we + /// generate for the descriptors of box type dummy arguments, so that + /// we can retrieve it for subsequent reuses within the functions +- /// scope ++ /// scope. + std::map + localBoxAllocas; +@@ -370,24 +370,23 @@ + if (!mapClauseOwner) + return; + +- auto addOperands = [&](mlir::MutableOperandRange &mutableOpRange, ++ auto addOperands = [&](mlir::MutableOperandRange &mapVarsArr, + mlir::Operation *directiveOp, + unsigned blockArgInsertIndex = 0) { +- if (!llvm::is_contained(mutableOpRange.getAsOperandRange(), +- op.getResult())) ++ if (!llvm::is_contained(mapVarsArr.getAsOperandRange(), op.getResult())) + return; + + // There doesn't appear to be a simple way to convert MutableOperandRange + // to a vector currently, so we instead use a for_each to populate our + // vector. + llvm::SmallVector newMapOps; +- newMapOps.reserve(mutableOpRange.size()); ++ newMapOps.reserve(mapVarsArr.size()); + llvm::for_each( +- mutableOpRange.getAsOperandRange(), ++ mapVarsArr.getAsOperandRange(), + [&newMapOps](mlir::Value oper) { newMapOps.push_back(oper); }); + + for (auto mapMember : op.getMembers()) { +- if (llvm::is_contained(mutableOpRange.getAsOperandRange(), mapMember)) ++ if (llvm::is_contained(mapVarsArr.getAsOperandRange(), mapMember)) + continue; + newMapOps.push_back(mapMember); + if (directiveOp) { +@@ -397,7 +396,7 @@ + } } -+ omp.terminator + +- mutableOpRange.assign(newMapOps); ++ mapVarsArr.assign(newMapOps); + }; + + auto argIface = +@@ -405,14 +404,13 @@ + + if (auto mapClauseOwner = + llvm::dyn_cast(target)) { +- mlir::MutableOperandRange mapMutableOpRange = +- mapClauseOwner.getMapVarsMutable(); ++ mlir::MutableOperandRange mapVarsArr = mapClauseOwner.getMapVarsMutable(); + unsigned blockArgInsertIndex = + argIface + ? argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs() + : 0; + addOperands( +- mapMutableOpRange, ++ mapVarsArr, + llvm::dyn_cast_or_null(argIface.getOperation()), + blockArgInsertIndex); } - omp.terminator +@@ -466,10 +464,7 @@ + // operation (usually function) containing the MapInfoOp because this pass + // will mutate siblings of MapInfoOp. + void runOnOperation() override { +- mlir::ModuleOp module = +- mlir::dyn_cast_or_null(getOperation()); +- if (!module) +- module = getOperation()->getParentOfType(); ++ mlir::ModuleOp module = mlir::cast(getOperation()); + fir::KindMapping kindMap = fir::getKindMapping(module); + fir::FirOpBuilder builder{module, std::move(kindMap)}; + +@@ -481,7 +476,7 @@ + // ourselves to the possibility of race conditions while this pass + // undergoes frequent re-iteration for the near future. So we loop + // over function in the module and then map.info inside of those. +- getOperation()->walk([&](mlir::func::FuncOp func) { ++ module->walk([&](mlir::func::FuncOp func) { + // clear all local allocations we made for any boxes in any prior + // iterations from previous function scopes. + localBoxAllocas.clear(); +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Optimizer/Passes/Pipelines.cpp llvm-project-aso/flang/lib/Optimizer/Passes/Pipelines.cpp +--- llvm-project-aso-orig/flang/lib/Optimizer/Passes/Pipelines.cpp 2024-11-23 20:25:26.843275164 -0600 ++++ llvm-project-aso/flang/lib/Optimizer/Passes/Pipelines.cpp 2024-11-23 20:39:47.180175366 -0600 +@@ -243,12 +243,21 @@ + /// \param pm - MLIR pass manager that will hold the pipeline definition. + /// \param isTargetDevice - Whether code is being generated for a target device + /// rather than the host device. +-void createOpenMPFIRPassPipeline(mlir::PassManager &pm, bool isTargetDevice) { ++void createOpenMPFIRPassPipeline(mlir::PassManager &pm, ++ OpenMPFIRPassPipelineOpts opts) { ++ if (opts.doConcurrentMappingKind != DoConcurrentMappingKind::DCMK_None) ++ pm.addPass(flangomp::createDoConcurrentConversionPass( ++ opts.doConcurrentMappingKind == DoConcurrentMappingKind::DCMK_Device)); ++ + pm.addPass(flangomp::createMapInfoFinalizationPass()); + pm.addPass(flangomp::createMapsForPrivatizedSymbolsPass()); + pm.addPass(flangomp::createMarkDeclareTargetPass()); +- if (isTargetDevice) ++ if (opts.isTargetDevice) { + pm.addPass(flangomp::createFunctionFilteringPass()); ++ ++ if (opts.enableOffloadGlobalFiltering) ++ pm.addPass(flangomp::createGlobalFilteringPass()); ++ } + } + + void createDebugPasses(mlir::PassManager &pm, +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/array-constructor.cpp llvm-project-aso/flang/runtime/array-constructor.cpp +--- llvm-project-aso-orig/flang/runtime/array-constructor.cpp 2024-08-27 20:36:25.236173040 -0500 ++++ llvm-project-aso/flang/runtime/array-constructor.cpp 2024-11-23 20:39:47.180175366 -0600 +@@ -176,7 +176,7 @@ + AllocateOrReallocateVectorIfNeeded(vector, terminator, to.Elements(), 1); + SubscriptValue subscript[1]{ + to.GetDimension(0).LowerBound() + vector.nextValuePosition}; +- std::memcpy(to.Element(subscript), from, to.ElementBytes()); ++ Fortran::runtime::memcpy(to.Element(subscript), from, to.ElementBytes()); + ++vector.nextValuePosition; + } + +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/assign.cpp llvm-project-aso/flang/runtime/assign.cpp +--- llvm-project-aso-orig/flang/runtime/assign.cpp 2024-11-23 20:25:26.851275134 -0600 ++++ llvm-project-aso/flang/runtime/assign.cpp 2024-11-23 20:39:47.180175366 -0600 +@@ -263,7 +263,7 @@ + if (MayAlias(to, from)) { + if (mustDeallocateLHS) { + deferDeallocation = &deferredDeallocStatDesc.descriptor(); +- std::memcpy(deferDeallocation, &to, to.SizeInBytes()); ++ Fortran::runtime::memcpy(deferDeallocation, &to, to.SizeInBytes()); + to.set_base_addr(nullptr); + } else if (!isSimpleMemmove()) { + // Handle LHS/RHS aliasing by copying RHS into a temp, then +@@ -271,7 +271,7 @@ + auto descBytes{from.SizeInBytes()}; + StaticDescriptor staticDesc; + Descriptor &newFrom{staticDesc.descriptor()}; +- std::memcpy(&newFrom, &from, descBytes); ++ Fortran::runtime::memcpy(&newFrom, &from, descBytes); + // Pretend the temporary descriptor is for an ALLOCATABLE + // entity, otherwise, the Deallocate() below will not + // free the descriptor memory. +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/buffer.h llvm-project-aso/flang/runtime/buffer.h +--- llvm-project-aso-orig/flang/runtime/buffer.h 2024-08-27 20:36:25.240173000 -0500 ++++ llvm-project-aso/flang/runtime/buffer.h 2024-11-23 20:39:47.180175366 -0600 +@@ -158,8 +158,8 @@ + // Avoid passing a null pointer, since it would result in an undefined + // behavior. + if (old != nullptr) { +- std::memcpy(buffer_, old + start_, chunk); +- std::memcpy(buffer_ + chunk, old, length_ - chunk); ++ Fortran::runtime::memcpy(buffer_, old + start_, chunk); ++ Fortran::runtime::memcpy(buffer_ + chunk, old, length_ - chunk); + FreeMemory(old); + } + start_ = 0; +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/character.cpp llvm-project-aso/flang/runtime/character.cpp +--- llvm-project-aso-orig/flang/runtime/character.cpp 2024-08-27 20:36:25.240173000 -0500 ++++ llvm-project-aso/flang/runtime/character.cpp 2024-11-23 20:39:47.180175366 -0600 +@@ -596,8 +596,8 @@ + from.GetLowerBounds(fromAt); + for (; elements-- > 0; + to += newBytes, p += oldBytes, from.IncrementSubscripts(fromAt)) { +- std::memcpy(to, p, oldBytes); +- std::memcpy(to + oldBytes, from.Element(fromAt), fromBytes); ++ Fortran::runtime::memcpy(to, p, oldBytes); ++ Fortran::runtime::memcpy(to + oldBytes, from.Element(fromAt), fromBytes); } -@@ -223,6 +224,7 @@ - // CHECK: llvm.store %[[I1]], %[[ARR_I_REF]] : i32, !llvm.ptr - // CHECK: omp.yield - // CHECK: } -+// CHECK: omp.terminator - // CHECK: } - // CHECK: omp.terminator - // CHECK: } -@@ -516,6 +518,7 @@ - fir.store %7 to %3 : !fir.ref - omp.yield + FreeMemory(old); + } +@@ -611,7 +611,7 @@ + std::size_t oldLen{accumulator.ElementBytes()}; + accumulator.raw().elem_len += chars; + RUNTIME_CHECK(terminator, accumulator.Allocate() == CFI_SUCCESS); +- std::memcpy(accumulator.OffsetElement(oldLen), from, chars); ++ Fortran::runtime::memcpy(accumulator.OffsetElement(oldLen), from, chars); + FreeMemory(old); + } + +@@ -677,7 +677,7 @@ + std::size_t RTDEF(CharacterAppend1)(char *lhs, std::size_t lhsBytes, + std::size_t offset, const char *rhs, std::size_t rhsBytes) { + if (auto n{std::min(lhsBytes - offset, rhsBytes)}) { +- std::memcpy(lhs + offset, rhs, n); ++ Fortran::runtime::memcpy(lhs + offset, rhs, n); + offset += n; + } + return offset; +@@ -685,7 +685,7 @@ + + void RTDEF(CharacterPad1)(char *lhs, std::size_t bytes, std::size_t offset) { + if (bytes > offset) { +- std::memset(lhs + offset, ' ', bytes - offset); ++ Fortran::runtime::memset(lhs + offset, ' ', bytes - offset); + } + } + +@@ -817,7 +817,7 @@ + } + const char *from{string.OffsetElement()}; + for (char *to{result.OffsetElement()}; ncopies-- > 0; to += origBytes) { +- std::memcpy(to, from, origBytes); ++ Fortran::runtime::memcpy(to, from, origBytes); + } + } + +@@ -847,7 +847,7 @@ + result.Establish(string.type(), resultBytes, nullptr, 0, nullptr, + CFI_attribute_allocatable); + RUNTIME_CHECK(terminator, result.Allocate() == CFI_SUCCESS); +- std::memcpy(result.OffsetElement(), string.OffsetElement(), resultBytes); ++ Fortran::runtime::memcpy(result.OffsetElement(), string.OffsetElement(), resultBytes); + } + + std::size_t RTDEF(Verify1)(const char *x, std::size_t xLen, const char *set, +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/command.cpp llvm-project-aso/flang/runtime/command.cpp +--- llvm-project-aso-orig/flang/runtime/command.cpp 2024-08-27 20:36:25.240173000 -0500 ++++ llvm-project-aso/flang/runtime/command.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -54,7 +54,7 @@ + + static void FillWithSpaces(const Descriptor &value, std::size_t offset = 0) { + if (offset < value.ElementBytes()) { +- std::memset( ++ Fortran::runtime::memset( + value.OffsetElement(offset), ' ', value.ElementBytes() - offset); + } + } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/copy.cpp llvm-project-aso/flang/runtime/copy.cpp +--- llvm-project-aso-orig/flang/runtime/copy.cpp 2024-08-27 20:36:25.240173000 -0500 ++++ llvm-project-aso/flang/runtime/copy.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -9,6 +9,7 @@ + #include "copy.h" + #include "stack.h" + #include "terminator.h" ++#include "tools.h" + #include "type-info.h" + #include "flang/Runtime/allocatable.h" + #include "flang/Runtime/descriptor.h" +@@ -101,7 +102,7 @@ + char *toPtr{to.Element(toAt)}; + char *fromPtr{from.Element(fromAt)}; + RUNTIME_CHECK(terminator, to.ElementBytes() == from.ElementBytes()); +- std::memcpy(toPtr, fromPtr, to.ElementBytes()); ++ Fortran::runtime::memcpy(toPtr, fromPtr, to.ElementBytes()); + return; + } + +@@ -148,7 +149,7 @@ + // Moreover, if we came here from an Component::Genre::Data component, + // all the per-element copies are redundant, because the parent + // has already been copied as a whole. +- std::memcpy(toPtr, fromPtr, curTo.ElementBytes()); ++ Fortran::runtime::memcpy(toPtr, fromPtr, curTo.ElementBytes()); + --elements; + if (elements != 0) { + currentCopy.IncrementSubscripts(terminator); +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/derived.cpp llvm-project-aso/flang/runtime/derived.cpp +--- llvm-project-aso-orig/flang/runtime/derived.cpp 2024-08-27 20:36:25.240173000 -0500 ++++ llvm-project-aso/flang/runtime/derived.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -73,7 +73,7 @@ + std::size_t bytes{comp.SizeInBytes(instance)}; + for (std::size_t j{0}; j++ < elements; instance.IncrementSubscripts(at)) { + char *ptr{instance.ElementComponent(at, comp.offset())}; +- std::memcpy(ptr, init, bytes); ++ Fortran::runtime::memcpy(ptr, init, bytes); + } + } else if (comp.genre() == typeInfo::Component::Genre::Pointer) { + // Data pointers without explicit initialization are established +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/descriptor.cpp llvm-project-aso/flang/runtime/descriptor.cpp +--- llvm-project-aso-orig/flang/runtime/descriptor.cpp 2024-08-27 20:36:25.240173000 -0500 ++++ llvm-project-aso/flang/runtime/descriptor.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -26,7 +26,7 @@ + RT_API_ATTRS Descriptor::Descriptor(const Descriptor &that) { *this = that; } + + RT_API_ATTRS Descriptor &Descriptor::operator=(const Descriptor &that) { +- std::memcpy(this, &that, that.SizeInBytes()); ++ Fortran::runtime::memcpy(this, &that, that.SizeInBytes()); + return *this; + } + +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/descriptor-io.cpp llvm-project-aso/flang/runtime/descriptor-io.cpp +--- llvm-project-aso-orig/flang/runtime/descriptor-io.cpp 2024-08-27 20:36:25.240173000 -0500 ++++ llvm-project-aso/flang/runtime/descriptor-io.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -32,7 +32,7 @@ + if (edit.descriptor == DataEdit::DefinedDerivedType) { + ioType[0] = 'D'; + ioType[1] = 'T'; +- std::memcpy(ioType + 2, edit.ioType, edit.ioTypeChars); ++ Fortran::runtime::memcpy(ioType + 2, edit.ioType, edit.ioTypeChars); + } else { + runtime::strcpy( + ioType, io.mutableModes().inNamelist ? "NAMELIST" : "LISTDIRECTED"); +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/edit-input.cpp llvm-project-aso/flang/runtime/edit-input.cpp +--- llvm-project-aso-orig/flang/runtime/edit-input.cpp 2024-09-13 09:46:38.870303386 -0500 ++++ llvm-project-aso/flang/runtime/edit-input.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -121,7 +121,7 @@ + io.HandleAbsolutePosition(start); + remaining.reset(); + // Make a second pass now that the digit count is known +- std::memset(n, 0, bytes); ++ Fortran::runtime::memset(n, 0, bytes); + int increment{isHostLittleEndian ? -1 : 1}; + auto *data{reinterpret_cast(n) + + (isHostLittleEndian ? significantBytes - 1 : bytes - significantBytes)}; +@@ -280,9 +280,9 @@ + // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian. + if (!isHostLittleEndian && shft >= 0) { + auto l{value.low() << (8 * shft)}; +- std::memcpy(n, &l, kind); ++ Fortran::runtime::memcpy(n, &l, kind); + } else { +- std::memcpy(n, &value, kind); // a blank field means zero ++ Fortran::runtime::memcpy(n, &value, kind); // a blank field means zero } -+ omp.terminator + return true; + } else { +@@ -1095,7 +1095,7 @@ + --skipChars; + } else { + char32_t buffer{0}; +- std::memcpy(&buffer, input, chunkBytes); ++ Fortran::runtime::memcpy(&buffer, input, chunkBytes); + if ((sizeof *x == 1 && buffer > 0xff) || + (sizeof *x == 2 && buffer > 0xffff)) { + *x++ = '?'; +@@ -1122,7 +1122,7 @@ + chunkBytes = std::min(remainingChars, readyBytes); + chunkBytes = std::min(lengthChars, chunkBytes); + chunkChars = chunkBytes; +- std::memcpy(x, input, chunkBytes); ++ Fortran::runtime::memcpy(x, input, chunkBytes); + x += chunkBytes; + lengthChars -= chunkChars; + } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/extensions.cpp llvm-project-aso/flang/runtime/extensions.cpp +--- llvm-project-aso-orig/flang/runtime/extensions.cpp 2024-10-18 17:40:32.520992126 -0500 ++++ llvm-project-aso/flang/runtime/extensions.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -78,7 +78,7 @@ + + void GetUsernameEnvVar(const char *envName, char *arg, std::int64_t length) { + Descriptor name{*Descriptor::Create( +- 1, std::strlen(envName) + 1, const_cast(envName), 0)}; ++ 1, Fortran::runtime::strlen(envName) + 1, const_cast(envName), 0)}; + Descriptor value{*Descriptor::Create(1, length, arg, 0)}; + + RTNAME(GetEnvVariable) +@@ -102,7 +102,7 @@ + char str[26]; + // Insufficient space, fill with spaces and return. + if (length < 24) { +- std::memset(arg, ' ', length); ++ Fortran::runtime::memset(arg, ' ', length); + return; + } + +@@ -134,8 +134,8 @@ + void FORTRAN_PROCEDURE_NAME(getlog)(char *arg, std::int64_t length) { + #if _REENTRANT || _POSIX_C_SOURCE >= 199506L + if (length >= 1 && getlogin_r(arg, length) == 0) { +- auto loginLen{std::strlen(arg)}; +- std::memset( ++ auto loginLen{Fortran::runtime::strlen(arg)}; ++ Fortran::runtime::memset( + arg + loginLen, ' ', static_cast(length) - loginLen); + return; + } +@@ -189,7 +189,7 @@ + char *newName{nullptr}; + if (name[nameLength - 1] != '\0') { + newName = static_cast(std::malloc(nameLength + 1)); +- std::memcpy(newName, name, nameLength); ++ Fortran::runtime::memcpy(newName, name, nameLength); + newName[nameLength] = '\0'; + name = newName; + } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/external-unit.cpp llvm-project-aso/flang/runtime/external-unit.cpp +--- llvm-project-aso-orig/flang/runtime/external-unit.cpp 2024-08-27 20:36:25.240173000 -0500 ++++ llvm-project-aso/flang/runtime/external-unit.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -122,7 +122,7 @@ + bool impliedClose{false}; + if (IsConnected()) { + bool isSamePath{newPath.get() && path() && pathLength() == newPathLength && +- std::memcmp(path(), newPath.get(), newPathLength) == 0}; ++ Fortran::runtime::memcmp(path(), newPath.get(), newPathLength) == 0}; + if (status && *status != OpenStatus::Old && isSamePath) { + handler.SignalError("OPEN statement for connected unit may not have " + "explicit STATUS= other than 'OLD'"); +@@ -202,7 +202,7 @@ + std::size_t pathMaxLen{32}; + auto path{SizedNew{handler}(pathMaxLen)}; + std::snprintf(path.get(), pathMaxLen, "fort.%d", unitNumber_); +- OpenUnit(status, action, position, std::move(path), std::strlen(path.get()), ++ OpenUnit(status, action, position, std::move(path), Fortran::runtime::strlen(path.get()), + convert, handler); + return IsConnected(); + } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/extrema.cpp llvm-project-aso/flang/runtime/extrema.cpp +--- llvm-project-aso-orig/flang/runtime/extrema.cpp 2024-09-24 18:07:09.519920643 -0500 ++++ llvm-project-aso/flang/runtime/extrema.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -374,7 +374,7 @@ + CreatePartialReductionResult(result, x, + Descriptor::BytesFor(TypeCategory::Integer, kind), dim, terminator, + intrinsic, TypeCode{TypeCategory::Integer, kind}); +- std::memset( ++ Fortran::runtime::memset( + result.OffsetElement(), 0, result.Elements() * result.ElementBytes()); + return; + } +@@ -518,11 +518,11 @@ + static_assert(std::is_same_v); + std::size_t byteSize{array_.ElementBytes()}; + if (extremum_) { +- std::memcpy(p, extremum_, byteSize); ++ Fortran::runtime::memcpy(p, extremum_, byteSize); + } else { + // Empty array; fill with character 0 for MAXVAL. + // For MINVAL, set all of the bits. +- std::memset(p, IS_MAXVAL ? 0 : 255, byteSize); ++ Fortran::runtime::memset(p, IS_MAXVAL ? 0 : 255, byteSize); + } + } + RT_API_ATTRS bool Accumulate(const Type *x) { +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/format-implementation.h llvm-project-aso/flang/runtime/format-implementation.h +--- llvm-project-aso-orig/flang/runtime/format-implementation.h 2024-09-13 09:46:38.870303386 -0500 ++++ llvm-project-aso/flang/runtime/format-implementation.h 2024-11-23 20:39:47.184175353 -0600 +@@ -49,7 +49,7 @@ + SubscriptValue at[maxRank]; + formatDescriptor->GetLowerBounds(at); + for (std::size_t j{0}; j < elements; ++j) { +- std::memcpy(p, formatDescriptor->Element(at), elementBytes); ++ Fortran::runtime::memcpy(p, formatDescriptor->Element(at), elementBytes); + p += elementBytes; + formatDescriptor->IncrementSubscripts(at); + } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/internal-unit.cpp llvm-project-aso/flang/runtime/internal-unit.cpp +--- llvm-project-aso-orig/flang/runtime/internal-unit.cpp 2024-08-27 20:36:25.240173000 -0500 ++++ llvm-project-aso/flang/runtime/internal-unit.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -70,7 +70,7 @@ + BlankFill(record + furthestPositionInRecord, + positionInRecord - furthestPositionInRecord); + } +- std::memcpy(record + positionInRecord, data, bytes); ++ Fortran::runtime::memcpy(record + positionInRecord, data, bytes); + positionInRecord += bytes; + furthestPositionInRecord = furthestAfter; + return ok; +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/io-error.cpp llvm-project-aso/flang/runtime/io-error.cpp +--- llvm-project-aso-orig/flang/runtime/io-error.cpp 2024-08-27 20:36:25.240173000 -0500 ++++ llvm-project-aso/flang/runtime/io-error.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -151,7 +151,7 @@ + } else if (ok) { + std::size_t copied{Fortran::runtime::strlen(buffer)}; + if (copied < bufferLength) { +- std::memset(buffer + copied, ' ', bufferLength - copied); ++ Fortran::runtime::memset(buffer + copied, ' ', bufferLength - copied); + } + return true; + } else { +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/matmul.cpp llvm-project-aso/flang/runtime/matmul.cpp +--- llvm-project-aso-orig/flang/runtime/matmul.cpp 2024-09-24 18:07:09.519920643 -0500 ++++ llvm-project-aso/flang/runtime/matmul.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -81,7 +81,7 @@ + SubscriptValue n, std::size_t xColumnByteStride = 0, + std::size_t yColumnByteStride = 0) { + using ResultType = CppTypeFor; +- std::memset(product, 0, rows * cols * sizeof *product); ++ Fortran::runtime::memset(product, 0, rows * cols * sizeof *product); + const XT *RESTRICT xp0{x}; + for (SubscriptValue k{0}; k < n; ++k) { + ResultType *RESTRICT p{product}; +@@ -153,7 +153,7 @@ + SubscriptValue n, const XT *RESTRICT x, const YT *RESTRICT y, + std::size_t xColumnByteStride = 0) { + using ResultType = CppTypeFor; +- std::memset(product, 0, rows * sizeof *product); ++ Fortran::runtime::memset(product, 0, rows * sizeof *product); + [[maybe_unused]] const XT *RESTRICT xp0{x}; + for (SubscriptValue k{0}; k < n; ++k) { + ResultType *RESTRICT p{product}; +@@ -203,7 +203,7 @@ + SubscriptValue cols, const XT *RESTRICT x, const YT *RESTRICT y, + std::size_t yColumnByteStride = 0) { + using ResultType = CppTypeFor; +- std::memset(product, 0, cols * sizeof *product); ++ Fortran::runtime::memset(product, 0, cols * sizeof *product); + for (SubscriptValue k{0}; k < n; ++k) { + ResultType *RESTRICT p{product}; + auto xv{static_cast(*x++)}; +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/matmul-transpose.cpp llvm-project-aso/flang/runtime/matmul-transpose.cpp +--- llvm-project-aso-orig/flang/runtime/matmul-transpose.cpp 2024-09-24 18:07:09.519920643 -0500 ++++ llvm-project-aso/flang/runtime/matmul-transpose.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -62,7 +62,7 @@ + std::size_t yColumnByteStride = 0) { + using ResultType = CppTypeFor; + +- std::memset(product, 0, rows * cols * sizeof *product); ++ Fortran::runtime::memset(product, 0, rows * cols * sizeof *product); + for (SubscriptValue j{0}; j < cols; ++j) { + for (SubscriptValue i{0}; i < rows; ++i) { + for (SubscriptValue k{0}; k < n; ++k) { +@@ -132,7 +132,7 @@ + SubscriptValue n, const XT *RESTRICT x, const YT *RESTRICT y, + std::size_t xColumnByteStride = 0) { + using ResultType = CppTypeFor; +- std::memset(product, 0, rows * sizeof *product); ++ Fortran::runtime::memset(product, 0, rows * sizeof *product); + for (SubscriptValue i{0}; i < rows; ++i) { + for (SubscriptValue k{0}; k < n; ++k) { + ResultType x_ki; +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/misc-intrinsic.cpp llvm-project-aso/flang/runtime/misc-intrinsic.cpp +--- llvm-project-aso-orig/flang/runtime/misc-intrinsic.cpp 2024-08-27 20:36:25.240173000 -0500 ++++ llvm-project-aso/flang/runtime/misc-intrinsic.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -42,14 +42,14 @@ + source.GetLowerBounds(sourceAt); + while (resultBytes > 0 && sourceElements > 0) { + std::size_t toMove{std::min(resultBytes, sourceElementBytes)}; +- std::memcpy(to, source.Element(sourceAt), toMove); ++ Fortran::runtime::memcpy(to, source.Element(sourceAt), toMove); + to += toMove; + resultBytes -= toMove; + --sourceElements; + source.IncrementSubscripts(sourceAt); + } + if (resultBytes > 0) { +- std::memset(to, 0, resultBytes); ++ Fortran::runtime::memset(to, 0, resultBytes); + } + } + +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/pseudo-unit.cpp llvm-project-aso/flang/runtime/pseudo-unit.cpp +--- llvm-project-aso-orig/flang/runtime/pseudo-unit.cpp 2024-08-27 20:36:25.244172960 -0500 ++++ llvm-project-aso/flang/runtime/pseudo-unit.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -132,7 +132,7 @@ + // TODO: use persistent string buffer that can be reallocated + // as needed, and only freed at destruction of *this. + auto string{SizedNew{handler}(bytes + 1)}; +- std::memcpy(string.get(), buffer, bytes); ++ Fortran::runtime::memcpy(string.get(), buffer, bytes); + string.get()[bytes] = '\0'; + std::printf("%s", string.get()); + return bytes; +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/ragged.cpp llvm-project-aso/flang/runtime/ragged.cpp +--- llvm-project-aso-orig/flang/runtime/ragged.cpp 2024-08-27 20:36:25.244172960 -0500 ++++ llvm-project-aso/flang/runtime/ragged.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -40,7 +40,7 @@ + std::size_t bytes{static_cast(elementSize * size)}; + header->bufferPointer = AllocateMemoryOrCrash(terminator, bytes); + if (header->bufferPointer) { +- std::memset(header->bufferPointer, 0, bytes); ++ Fortran::runtime::memset(header->bufferPointer, 0, bytes); + } + return header; + } else { +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/reduce.cpp llvm-project-aso/flang/runtime/reduce.cpp +--- llvm-project-aso-orig/flang/runtime/reduce.cpp 2024-09-24 18:07:09.519920643 -0500 ++++ llvm-project-aso/flang/runtime/reduce.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -79,16 +79,16 @@ + activeTemp_ = 1 - activeTemp_; + } else { + activeTemp_ = 0; +- std::memcpy(&*temp_[activeTemp_], operand, elementBytes_); ++ Fortran::runtime::memcpy(&*temp_[activeTemp_], operand, elementBytes_); + } + return true; + } + template + RT_API_ATTRS void GetResult(A *to, int /*zeroBasedDim*/ = -1) { + if (activeTemp_ >= 0) { +- std::memcpy(to, &*temp_[activeTemp_], elementBytes_); ++ Fortran::runtime::memcpy(to, &*temp_[activeTemp_], elementBytes_); + } else if (identity_) { +- std::memcpy(to, identity_, elementBytes_); ++ Fortran::runtime::memcpy(to, identity_, elementBytes_); + } else { + terminator_.Crash("REDUCE() without IDENTITY= has no result"); + } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/stat.cpp llvm-project-aso/flang/runtime/stat.cpp +--- llvm-project-aso-orig/flang/runtime/stat.cpp 2024-08-27 20:36:25.244172960 -0500 ++++ llvm-project-aso/flang/runtime/stat.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -84,10 +84,10 @@ + std::size_t bufferLength{errmsg->ElementBytes()}; + std::size_t msgLength{Fortran::runtime::strlen(msg)}; + if (msgLength >= bufferLength) { +- std::memcpy(buffer, msg, bufferLength); ++ Fortran::runtime::memcpy(buffer, msg, bufferLength); + } else { +- std::memcpy(buffer, msg, msgLength); +- std::memset(buffer + msgLength, ' ', bufferLength - msgLength); ++ Fortran::runtime::memcpy(buffer, msg, msgLength); ++ Fortran::runtime::memset(buffer + msgLength, ' ', bufferLength - msgLength); + } + } + } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/time-intrinsic.cpp llvm-project-aso/flang/runtime/time-intrinsic.cpp +--- llvm-project-aso-orig/flang/runtime/time-intrinsic.cpp 2024-09-13 09:46:38.870303386 -0500 ++++ llvm-project-aso/flang/runtime/time-intrinsic.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -221,13 +221,13 @@ + char *zone, std::size_t zoneChars, + const Fortran::runtime::Descriptor *values) { + if (date) { +- std::memset(date, static_cast(' '), dateChars); ++ Fortran::runtime::memset(date, static_cast(' '), dateChars); + } + if (time) { +- std::memset(time, static_cast(' '), timeChars); ++ Fortran::runtime::memset(time, static_cast(' '), timeChars); + } + if (zone) { +- std::memset(zone, static_cast(' '), zoneChars); ++ Fortran::runtime::memset(zone, static_cast(' '), zoneChars); + } + if (values) { + auto typeCode{values->type().GetCategoryAndKind()}; +@@ -365,7 +365,7 @@ + auto copyBufferAndPad{ + [&](char *dest, std::size_t destChars, std::size_t len) { + auto copyLen{std::min(len, destChars)}; +- std::memcpy(dest, buffer, copyLen); ++ Fortran::runtime::memcpy(dest, buffer, copyLen); + for (auto i{copyLen}; i < destChars; ++i) { + dest[i] = ' '; + } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/tools.cpp llvm-project-aso/flang/runtime/tools.cpp +--- llvm-project-aso-orig/flang/runtime/tools.cpp 2024-08-27 20:36:25.244172960 -0500 ++++ llvm-project-aso/flang/runtime/tools.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -28,7 +28,7 @@ + const char *s, std::size_t length, const Terminator &terminator) { + if (s) { + auto *p{static_cast(AllocateMemoryOrCrash(terminator, length + 1))}; +- std::memcpy(p, s, length); ++ Fortran::runtime::memcpy(p, s, length); + p[length] = '\0'; + return OwningPtr{p}; + } else { +@@ -75,10 +75,10 @@ + char *to, std::size_t toLength, const char *from) { + std::size_t len{Fortran::runtime::strlen(from)}; + if (len < toLength) { +- std::memcpy(to, from, len); +- std::memset(to + len, ' ', toLength - len); ++ Fortran::runtime::memcpy(to, from, len); ++ Fortran::runtime::memset(to + len, ' ', toLength - len); + } else { +- std::memcpy(to, from, toLength); ++ Fortran::runtime::memcpy(to, from, toLength); + } + } + +@@ -122,7 +122,7 @@ + std::size_t elementBytes{to.ElementBytes()}; + for (std::size_t n{to.Elements()}; n-- > 0; + to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) { +- std::memcpy( ++ Fortran::runtime::memcpy( + to.Element(toAt), from.Element(fromAt), elementBytes); + } + } +@@ -135,7 +135,7 @@ + std::size_t elementBytes{to.ElementBytes()}; + for (std::size_t n{to.Elements()}; n-- > 0; + toAt += elementBytes, from.IncrementSubscripts(fromAt)) { +- std::memcpy(toAt, from.Element(fromAt), elementBytes); ++ Fortran::runtime::memcpy(toAt, from.Element(fromAt), elementBytes); + } + } + +@@ -147,7 +147,7 @@ + std::size_t elementBytes{to.ElementBytes()}; + for (std::size_t n{to.Elements()}; n-- > 0; + to.IncrementSubscripts(toAt), fromAt += elementBytes) { +- std::memcpy(to.Element(toAt), fromAt, elementBytes); ++ Fortran::runtime::memcpy(to.Element(toAt), fromAt, elementBytes); + } + } + +@@ -155,7 +155,7 @@ + bool toIsContiguous, bool fromIsContiguous) { + if (toIsContiguous) { + if (fromIsContiguous) { +- std::memcpy(to.OffsetElement(), from.OffsetElement(), ++ Fortran::runtime::memcpy(to.OffsetElement(), from.OffsetElement(), + to.Elements() * to.ElementBytes()); + } else { + ShallowCopyDiscontiguousToContiguous(to, from); +@@ -177,7 +177,7 @@ + char *str, std::size_t length, Terminator &terminator) { + if (runtime::memchr(str, '\0', length) == nullptr) { + char *newCmd{(char *)AllocateMemoryOrCrash(terminator, length + 1)}; +- std::memcpy(newCmd, str, length); ++ Fortran::runtime::memcpy(newCmd, str, length); + newCmd[length] = '\0'; + return newCmd; + } else { +@@ -209,7 +209,7 @@ + return ToErrmsg(errmsg, StatValueTooShort); + } + +- std::memcpy(value.OffsetElement(offset), rawValue, toCopy); ++ Fortran::runtime::memcpy(value.OffsetElement(offset), rawValue, toCopy); + + if (static_cast(rawValueLength) > toCopy) { + return ToErrmsg(errmsg, StatValueTooShort); +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/tools.h llvm-project-aso/flang/runtime/tools.h +--- llvm-project-aso-orig/flang/runtime/tools.h 2024-08-27 20:36:25.244172960 -0500 ++++ llvm-project-aso/flang/runtime/tools.h 2024-11-23 20:39:47.184175353 -0600 +@@ -521,9 +521,9 @@ + to[j] = static_cast(' '); + } + } else if (toChars <= fromChars) { +- std::memcpy(to, from, toChars * sizeof(TO)); ++ Fortran::runtime::memcpy(to, from, toChars * sizeof(TO)); + } else { +- std::memcpy(to, from, std::min(toChars, fromChars) * sizeof(TO)); ++ Fortran::runtime::memcpy(to, from, std::min(toChars, fromChars) * sizeof(TO)); + for (std::size_t j{fromChars}; j < toChars; ++j) { + to[j] = static_cast(' '); + } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/transformational.cpp llvm-project-aso/flang/runtime/transformational.cpp +--- llvm-project-aso-orig/flang/runtime/transformational.cpp 2024-11-23 20:25:26.851275134 -0600 ++++ llvm-project-aso/flang/runtime/transformational.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -114,7 +114,7 @@ + "not yet implemented: CHARACTER(KIND=%d) in EOSHIFT intrinsic", kind); + } + } else { +- std::memset(result.raw().base_addr, 0, bytes); ++ Fortran::runtime::memset(result.raw().base_addr, 0, bytes); + } + } + +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/unit.cpp llvm-project-aso/flang/runtime/unit.cpp +--- llvm-project-aso-orig/flang/runtime/unit.cpp 2024-08-27 20:36:25.244172960 -0500 ++++ llvm-project-aso/flang/runtime/unit.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -90,11 +90,11 @@ + CheckDirectAccess(handler); + WriteFrame(frameOffsetInFile_, recordOffsetInFrame_ + furthestAfter, handler); + if (positionInRecord > furthestPositionInRecord) { +- std::memset(Frame() + recordOffsetInFrame_ + furthestPositionInRecord, ' ', ++ Fortran::runtime::memset(Frame() + recordOffsetInFrame_ + furthestPositionInRecord, ' ', + positionInRecord - furthestPositionInRecord); + } + char *to{Frame() + recordOffsetInFrame_ + positionInRecord}; +- std::memcpy(to, data, bytes); ++ Fortran::runtime::memcpy(to, data, bytes); + if (swapEndianness_) { + SwapEndianness(to, bytes, elementBytes); + } +@@ -119,7 +119,7 @@ + auto need{recordOffsetInFrame_ + furthestAfter}; + auto got{ReadFrame(frameOffsetInFile_, need, handler)}; + if (got >= need) { +- std::memcpy(data, Frame() + recordOffsetInFrame_ + positionInRecord, bytes); ++ Fortran::runtime::memcpy(data, Frame() + recordOffsetInFrame_ + positionInRecord, bytes); + if (swapEndianness_) { + SwapEndianness(data, bytes, elementBytes); + } +@@ -303,7 +303,7 @@ + // Pad remainder of fixed length record + WriteFrame( + frameOffsetInFile_, recordOffsetInFrame_ + *openRecl, handler); +- std::memset(Frame() + recordOffsetInFrame_ + furthestPositionInRecord, ++ Fortran::runtime::memset(Frame() + recordOffsetInFrame_ + furthestPositionInRecord, + isUnformatted.value_or(false) ? 0 : ' ', + *openRecl - furthestPositionInRecord); + furthestPositionInRecord = *openRecl; +@@ -778,7 +778,7 @@ + std::int32_t ExternalFileUnit::ReadHeaderOrFooter(std::int64_t frameOffset) { + std::int32_t word; + char *wordPtr{reinterpret_cast(&word)}; +- std::memcpy(wordPtr, Frame() + frameOffset, sizeof word); ++ Fortran::runtime::memcpy(wordPtr, Frame() + frameOffset, sizeof word); + if (swapEndianness_) { + SwapEndianness(wordPtr, sizeof word, sizeof word); + } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/unit-map.cpp llvm-project-aso/flang/runtime/unit-map.cpp +--- llvm-project-aso-orig/flang/runtime/unit-map.cpp 2024-08-27 20:36:25.244172960 -0500 ++++ llvm-project-aso/flang/runtime/unit-map.cpp 2024-11-23 20:39:47.184175353 -0600 +@@ -118,7 +118,7 @@ + for (int j{0}; j < buckets_; ++j) { + for (Chain *p{bucket_[j].get()}; p; p = p->next.get()) { + if (p->unit.path() && p->unit.pathLength() == pathLen && +- std::memcmp(p->unit.path(), path, pathLen) == 0) { ++ Fortran::runtime::memcmp(p->unit.path(), path, pathLen) == 0) { + return &p->unit; + } + } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Driver/bbc-openmp-version-macro.f90 llvm-project-aso/flang/test/Driver/bbc-openmp-version-macro.f90 +--- llvm-project-aso-orig/flang/test/Driver/bbc-openmp-version-macro.f90 2024-08-27 20:36:25.248172920 -0500 ++++ llvm-project-aso/flang/test/Driver/bbc-openmp-version-macro.f90 2024-11-23 20:39:47.184175353 -0600 +@@ -13,7 +13,7 @@ + ! RUN: bbc -fopenmp -fopenmp-version=51 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-51 + ! RUN: bbc -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-52 + +-! DEFAULT-OPENMP-VERSION: {{.*}} = arith.constant 199911 : i32 ++! DEFAULT-OPENMP-VERSION: {{.*}} = arith.constant 202111 : i32 + ! OPENMP-VERSION-11: {{.*}} = arith.constant 199911 : i32 + ! OPENMP-VERSION-20: {{.*}} = arith.constant 200011 : i32 + ! OPENMP-VERSION-25: {{.*}} = arith.constant 200505 : i32 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Driver/fdefault.f90 llvm-project-aso/flang/test/Driver/fdefault.f90 +--- llvm-project-aso-orig/flang/test/Driver/fdefault.f90 2024-08-27 20:36:25.248172920 -0500 ++++ llvm-project-aso/flang/test/Driver/fdefault.f90 2024-11-23 20:40:34.720003838 -0600 +@@ -24,6 +24,31 @@ + ! RUN: not %flang_fc1 -fsyntax-only -fdefault-double-8 %s 2>&1 | FileCheck %s --check-prefix=ERROR + + ! NOOPTION: integer(4),parameter::real_kind=4_4 ++! TODO: Add checks when actual codegen is possible for this family ++ ++!-------------------------- ++! FLANG DRIVER (flang-new) ++!-------------------------- ++! RUN: rm -rf %t/dir-flang-new && mkdir -p %t/dir-flang-new && %flang -fsyntax-only -module-dir %t/dir-flang-new %s 2>&1 ++! RUN: cat %t/dir-flang-new/m.mod | FileCheck %s --check-prefix=NOOPTION ++! RUN: rm -rf %t/dir-flang-new && mkdir -p %t/dir-flang-new && %flang -fsyntax-only -fdefault-real-8 -module-dir %t/dir-flang-new %s 2>&1 ++! RUN: cat %t/dir-flang-new/m.mod | FileCheck %s --check-prefix=REAL8 ++! RUN: rm -rf %t/dir-flang-new && mkdir -p %t/dir-flang-new && %flang -fsyntax-only -fdefault-real-8 -fdefault-double-8 -module-dir %t/dir-flang-new %s 2>&1 ++! RUN: cat %t/dir-flang-new/m.mod | FileCheck %s --check-prefix=DOUBLE8 ++! RUN: not %flang -fsyntax-only -fdefault-double-8 %s 2>&1 | FileCheck %s --check-prefix=ERROR ++ ++!----------------------------------------- ++! FRONTEND FLANG DRIVER (flang-new -fc1) ++!----------------------------------------- ++! RUN: rm -rf %t/dir-flang-new && mkdir -p %t/dir-flang-new && %flang_fc1 -fsyntax-only -module-dir %t/dir-flang-new %s 2>&1 ++! RUN: cat %t/dir-flang-new/m.mod | FileCheck %s --check-prefix=NOOPTION ++! RUN: rm -rf %t/dir-flang-new && mkdir -p %t/dir-flang-new && %flang_fc1 -fsyntax-only -fdefault-real-8 -module-dir %t/dir-flang-new %s 2>&1 ++! RUN: cat %t/dir-flang-new/m.mod | FileCheck %s --check-prefix=REAL8 ++! RUN: rm -rf %t/dir-flang-new && mkdir -p %t/dir-flang-new && %flang_fc1 -fsyntax-only -fdefault-real-8 -fdefault-double-8 -module-dir %t/dir-flang-new %s 2>&1 ++! RUN: cat %t/dir-flang-new/m.mod | FileCheck %s --check-prefix=DOUBLE8 ++! RUN: not %flang_fc1 -fsyntax-only -fdefault-double-8 %s 2>&1 | FileCheck %s --check-prefix=ERROR ++ ++! NOOPTION: integer(4),parameter::real_kind=4_4 + ! NOOPTION-NEXT: intrinsic::kind + ! NOOPTION-NEXT: integer(4),parameter::double_kind=8_4 + +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Driver/flang-openmp-version-macro.f90 llvm-project-aso/flang/test/Driver/flang-openmp-version-macro.f90 +--- llvm-project-aso-orig/flang/test/Driver/flang-openmp-version-macro.f90 2024-08-27 20:36:25.248172920 -0500 ++++ llvm-project-aso/flang/test/Driver/flang-openmp-version-macro.f90 2024-11-23 20:39:47.184175353 -0600 +@@ -13,7 +13,7 @@ + ! RUN: %flang_fc1 -fopenmp -fopenmp-version=51 -cpp -E %s | FileCheck %s --check-prefix=OPENMP-VERSION-51 + ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -cpp -E %s | FileCheck %s --check-prefix=OPENMP-VERSION-52 + +-! DEFAULT-OPENMP-VERSION: integer :: var1 = 199911 ++! DEFAULT-OPENMP-VERSION: integer :: var1 = 202111 + ! OPENMP-VERSION-11: integer :: var1 = 199911 + ! OPENMP-VERSION-20: integer :: var1 = 200011 + ! OPENMP-VERSION-25: integer :: var1 = 200505 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir llvm-project-aso/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +--- llvm-project-aso-orig/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir 2024-11-23 20:25:26.851275134 -0600 ++++ llvm-project-aso/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir 2024-11-23 20:39:47.188175337 -0600 +@@ -1022,7 +1022,7 @@ + %8 = fir.load %4 : !fir.ref + %9 = arith.addi %8, %c20_i32 : i32 + fir.store %9 to %7 : !fir.ref +- omp.terminator ++ omp.terminator + } + return + } +@@ -1059,7 +1059,7 @@ + %9 = fir.load %arg0 : !fir.ref + %10 = arith.muli %9, %c10_i32 : i32 + fir.store %10 to %arg1 : !fir.ref +- omp.terminator ++ omp.terminator } return } -@@ -536,6 +539,7 @@ - // CHECK: ^bb3: - // CHECK: omp.yield - // CHECK: } -+// CHECK: omp.terminator - // CHECK: } - // CHECK: llvm.return - // CHECK: } -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Integration/OpenMP/target-filtering.f90 llvm-project/flang/test/Integration/OpenMP/target-filtering.f90 ---- llvm-project.orig/flang/test/Integration/OpenMP/target-filtering.f90 2024-02-15 09:48:32.619800701 -0600 -+++ llvm-project/flang/test/Integration/OpenMP/target-filtering.f90 2024-06-12 10:44:09.351614239 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 llvm-project-aso/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 +--- llvm-project-aso-orig/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 2024-10-18 17:40:32.532992003 -0500 ++++ llvm-project-aso/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 2024-11-23 20:39:47.188175337 -0600 +@@ -72,80 +72,80 @@ + ! CHECK-NEXT: br label %omp.private.copy + + ! CHECK: omp.private.copy: ; preds = %omp.region.cont +-! CHECK-NEXT: br label %omp.private.copy10 ++! CHECK-NEXT: br label %omp.private.copy11 + +-! CHECK: omp.private.copy10: ; preds = %omp.private.copy ++! CHECK: omp.private.copy11: ; preds = %omp.private.copy + ! [begin firstprivate copy for first var] + ! [read the length, is it non-zero?] +-! CHECK: br i1 %{{.*}}, label %omp.private.copy11, label %omp.private.copy12 ++! CHECK: br i1 %{{.*}}, label %omp.private.copy12, label %omp.private.copy13 + +-! CHECK: omp.private.copy12: ; preds = %omp.private.copy11, %omp.private.copy10 +-! CHECK-NEXT: br label %omp.region.cont9 ++! CHECK: omp.private.copy13: ; preds = %omp.private.copy12, %omp.private.copy11 ++! CHECK-NEXT: br label %omp.region.cont10 + +-! CHECK: omp.region.cont9: ; preds = %omp.private.copy12 ++! CHECK: omp.region.cont10: ; preds = %omp.private.copy13 + ! CHECK-NEXT: %{{.*}} = phi ptr +-! CHECK-NEXT: br label %omp.private.copy14 ++! CHECK-NEXT: br label %omp.private.copy15 + +-! CHECK: omp.private.copy14: ; preds = %omp.region.cont9 ++! CHECK: omp.private.copy15: ; preds = %omp.region.cont10 + ! [begin firstprivate copy for second var] + ! [read the length, is it non-zero?] +-! CHECK: br i1 %{{.*}}, label %omp.private.copy15, label %omp.private.copy16 ++! CHECK: br i1 %{{.*}}, label %omp.private.copy16, label %omp.private.copy17 + +-! CHECK: omp.private.copy16: ; preds = %omp.private.copy15, %omp.private.copy14 +-! CHECK-NEXT: br label %omp.region.cont13 ++! CHECK: omp.private.copy17: ; preds = %omp.private.copy16, %omp.private.copy15 ++! CHECK-NEXT: br label %omp.region.cont14 + +-! CHECK: omp.region.cont13: ; preds = %omp.private.copy16 ++! CHECK: omp.region.cont14: ; preds = %omp.private.copy17 + ! CHECK-NEXT: %{{.*}} = phi ptr + ! CHECK-NEXT: br label %omp.reduction.init + +-! CHECK: omp.reduction.init: ; preds = %omp.region.cont13 ++! CHECK: omp.reduction.init: ; preds = %omp.region.cont14 + ! [deffered stores for results of reduction alloc regions] + ! CHECK: br label %[[VAL_96:.*]] + + ! CHECK: omp.reduction.neutral: ; preds = %omp.reduction.init + ! [start of reduction initialization region] + ! [null check:] +-! CHECK: br i1 %{{.*}}, label %omp.reduction.neutral18, label %omp.reduction.neutral19 ++! CHECK: br i1 %{{.*}}, label %omp.reduction.neutral19, label %omp.reduction.neutral20 + +-! CHECK: omp.reduction.neutral19: ; preds = %omp.reduction.neutral ++! CHECK: omp.reduction.neutral20: ; preds = %omp.reduction.neutral + ! [malloc and assign the default value to the reduction variable] +-! CHECK: br label %omp.reduction.neutral20 ++! CHECK: br label %omp.reduction.neutral21 + +-! CHECK: omp.reduction.neutral20: ; preds = %omp.reduction.neutral18, %omp.reduction.neutral19 +-! CHECK-NEXT: br label %omp.region.cont17 ++! CHECK: omp.reduction.neutral21: ; preds = %omp.reduction.neutral19, %omp.reduction.neutral20 ++! CHECK-NEXT: br label %omp.region.cont18 + +-! CHECK: omp.region.cont17: ; preds = %omp.reduction.neutral20 ++! CHECK: omp.region.cont18: ; preds = %omp.reduction.neutral21 + ! CHECK-NEXT: %{{.*}} = phi ptr +-! CHECK-NEXT: br label %omp.reduction.neutral22 ++! CHECK-NEXT: br label %omp.reduction.neutral23 + +-! CHECK: omp.reduction.neutral22: ; preds = %omp.region.cont17 ++! CHECK: omp.reduction.neutral23: ; preds = %omp.region.cont18 + ! [start of reduction initialization region] + ! [null check:] +-! CHECK: br i1 %{{.*}}, label %omp.reduction.neutral23, label %omp.reduction.neutral24 ++! CHECK: br i1 %{{.*}}, label %omp.reduction.neutral24, label %omp.reduction.neutral25 + +-! CHECK: omp.reduction.neutral24: ; preds = %omp.reduction.neutral22 ++! CHECK: omp.reduction.neutral25: ; preds = %omp.reduction.neutral23 + ! [malloc and assign the default value to the reduction variable] +-! CHECK: br label %omp.reduction.neutral25 ++! CHECK: br label %omp.reduction.neutral26 + +-! CHECK: omp.reduction.neutral25: ; preds = %omp.reduction.neutral23, %omp.reduction.neutral24 +-! CHECK-NEXT: br label %omp.region.cont21 ++! CHECK: omp.reduction.neutral26: ; preds = %omp.reduction.neutral24, %omp.reduction.neutral25 ++! CHECK-NEXT: br label %omp.region.cont22 + +-! CHECK: omp.region.cont21: ; preds = %omp.reduction.neutral25 ++! CHECK: omp.region.cont22: ; preds = %omp.reduction.neutral26 + ! CHECK-NEXT: %{{.*}} = phi ptr + ! CHECK-NEXT: br label %omp.par.region + +-! CHECK: omp.par.region: ; preds = %omp.region.cont21 +-! CHECK-NEXT: br label %omp.par.region27 ++! CHECK: omp.par.region: ; preds = %omp.region.cont22 ++! CHECK-NEXT: br label %omp.par.region28 + +-! CHECK: omp.par.region27: ; preds = %omp.par.region ++! CHECK: omp.par.region28: ; preds = %omp.par.region + ! [call SUM runtime function] + ! [if (sum(a) == 1)] +-! CHECK: br i1 %{{.*}}, label %omp.par.region28, label %omp.par.region29 ++! CHECK: br i1 %{{.*}}, label %omp.par.region29, label %omp.par.region30 + +-! CHECK: omp.par.region29: ; preds = %omp.par.region27 +-! CHECK-NEXT: br label %omp.region.cont26 ++! CHECK: omp.par.region30: ; preds = %omp.par.region28 ++! CHECK-NEXT: br label %omp.region.cont27 + +-! CHECK: omp.region.cont26: ; preds = %omp.par.region28, %omp.par.region29 ++! CHECK: omp.region.cont27: ; preds = %omp.par.region29, %omp.par.region30 + ! [omp parallel region done, call into the runtime to complete reduction] + ! CHECK: %[[VAL_233:.*]] = call i32 @__kmpc_reduce( + ! CHECK: switch i32 %[[VAL_233]], label %reduce.finalize [ +@@ -153,16 +153,16 @@ + ! CHECK-NEXT: i32 2, label %reduce.switch.atomic + ! CHECK-NEXT: ] + +-! CHECK: reduce.switch.atomic: ; preds = %omp.region.cont26 ++! CHECK: reduce.switch.atomic: ; preds = %omp.region.cont27 + ! CHECK-NEXT: unreachable + +-! CHECK: reduce.switch.nonatomic: ; preds = %omp.region.cont26 ++! CHECK: reduce.switch.nonatomic: ; preds = %omp.region.cont27 + ! CHECK-NEXT: %[[red_private_value_0:.*]] = load ptr, ptr %{{.*}}, align 8 + ! CHECK-NEXT: br label %omp.reduction.nonatomic.body + + ! [various blocks implementing the reduction] + +-! CHECK: omp.region.cont35: ; preds = ++! CHECK: omp.region.cont36: ; preds = + ! CHECK-NEXT: %{{.*}} = phi ptr + ! CHECK-NEXT: call void @__kmpc_end_reduce( + ! CHECK-NEXT: br label %reduce.finalize +@@ -176,79 +176,79 @@ + + ! CHECK: omp.reduction.cleanup: ; preds = %omp.par.pre_finalize + ! [null check] +-! CHECK: br i1 %{{.*}}, label %omp.reduction.cleanup41, label %omp.reduction.cleanup42 ++! CHECK: br i1 %{{.*}}, label %omp.reduction.cleanup42, label %omp.reduction.cleanup43 + +-! CHECK: omp.reduction.cleanup42: ; preds = %omp.reduction.cleanup41, %omp.reduction.cleanup +-! CHECK-NEXT: br label %omp.region.cont40 ++! CHECK: omp.reduction.cleanup43: ; preds = %omp.reduction.cleanup42, %omp.reduction.cleanup ++! CHECK-NEXT: br label %omp.region.cont41 + +-! CHECK: omp.region.cont40: ; preds = %omp.reduction.cleanup42 ++! CHECK: omp.region.cont41: ; preds = %omp.reduction.cleanup43 + ! CHECK-NEXT: %{{.*}} = load ptr, ptr +-! CHECK-NEXT: br label %omp.reduction.cleanup44 ++! CHECK-NEXT: br label %omp.reduction.cleanup45 + +-! CHECK: omp.reduction.cleanup44: ; preds = %omp.region.cont40 ++! CHECK: omp.reduction.cleanup45: ; preds = %omp.region.cont41 + ! [null check] +-! CHECK: br i1 %{{.*}}, label %omp.reduction.cleanup45, label %omp.reduction.cleanup46 ++! CHECK: br i1 %{{.*}}, label %omp.reduction.cleanup46, label %omp.reduction.cleanup47 + +-! CHECK: omp.reduction.cleanup46: ; preds = %omp.reduction.cleanup45, %omp.reduction.cleanup44 +-! CHECK-NEXT: br label %omp.region.cont43 ++! CHECK: omp.reduction.cleanup47: ; preds = %omp.reduction.cleanup46, %omp.reduction.cleanup45 ++! CHECK-NEXT: br label %omp.region.cont44 + +-! CHECK: omp.region.cont43: ; preds = %omp.reduction.cleanup46 ++! CHECK: omp.region.cont44: ; preds = %omp.reduction.cleanup47 + ! CHECK-NEXT: br label %omp.private.dealloc + +-! CHECK: omp.private.dealloc: ; preds = %omp.region.cont43 ++! CHECK: omp.private.dealloc: ; preds = %omp.region.cont44 + ! [null check] +-! CHECK: br i1 %{{.*}}, label %omp.private.dealloc48, label %omp.private.dealloc49 ++! CHECK: br i1 %{{.*}}, label %omp.private.dealloc49, label %omp.private.dealloc50 + +-! CHECK: omp.private.dealloc49: ; preds = %omp.private.dealloc48, %omp.private.dealloc +-! CHECK-NEXT: br label %omp.region.cont47 ++! CHECK: omp.private.dealloc50: ; preds = %omp.private.dealloc49, %omp.private.dealloc ++! CHECK-NEXT: br label %omp.region.cont48 + +-! CHECK: omp.region.cont47: ; preds = %omp.private.dealloc49 +-! CHECK-NEXT: br label %omp.private.dealloc51 ++! CHECK: omp.region.cont48: ; preds = %omp.private.dealloc50 ++! CHECK-NEXT: br label %omp.private.dealloc52 + +-! CHECK: omp.private.dealloc51: ; preds = %omp.region.cont47 ++! CHECK: omp.private.dealloc52: ; preds = %omp.region.cont48 + ! [null check] +-! CHECK: br i1 %{{.*}}, label %omp.private.dealloc52, label %omp.private.dealloc53 ++! CHECK: br i1 %{{.*}}, label %omp.private.dealloc53, label %omp.private.dealloc54 + +-! CHECK: omp.private.dealloc53: ; preds = %omp.private.dealloc52, %omp.private.dealloc51 +-! CHECK-NEXT: br label %omp.region.cont50 ++! CHECK: omp.private.dealloc54: ; preds = %omp.private.dealloc53, %omp.private.dealloc52 ++! CHECK-NEXT: br label %omp.region.cont51 + +-! CHECK: omp.region.cont50: ; preds = %omp.private.dealloc53 ++! CHECK: omp.region.cont51: ; preds = %omp.private.dealloc54 + ! CHECK-NEXT: br label %omp.par.outlined.exit.exitStub + +-! CHECK: omp.private.dealloc52: ; preds = %omp.private.dealloc51 ++! CHECK: omp.private.dealloc53: ; preds = %omp.private.dealloc52 + ! [dealloc memory] +-! CHECK: br label %omp.private.dealloc53 ++! CHECK: br label %omp.private.dealloc54 + +-! CHECK: omp.private.dealloc48: ; preds = %omp.private.dealloc ++! CHECK: omp.private.dealloc49: ; preds = %omp.private.dealloc + ! [dealloc memory] +-! CHECK: br label %omp.private.dealloc49 ++! CHECK: br label %omp.private.dealloc50 + +-! CHECK: omp.reduction.cleanup45: ; preds = %omp.reduction.cleanup44 ++! CHECK: omp.reduction.cleanup46: ; preds = %omp.reduction.cleanup45 + ! CHECK-NEXT: call void @free( +-! CHECK-NEXT: br label %omp.reduction.cleanup46 ++! CHECK-NEXT: br label %omp.reduction.cleanup47 + +-! CHECK: omp.reduction.cleanup41: ; preds = %omp.reduction.cleanup ++! CHECK: omp.reduction.cleanup42: ; preds = %omp.reduction.cleanup + ! CHECK-NEXT: call void @free( +-! CHECK-NEXT: br label %omp.reduction.cleanup42 ++! CHECK-NEXT: br label %omp.reduction.cleanup43 + +-! CHECK: omp.par.region28: ; preds = %omp.par.region27 ++! CHECK: omp.par.region29: ; preds = %omp.par.region28 + ! CHECK-NEXT: call {} @_FortranAStopStatement + +-! CHECK: omp.reduction.neutral23: ; preds = %omp.reduction.neutral22 ++! CHECK: omp.reduction.neutral24: ; preds = %omp.reduction.neutral23 + ! [source length was zero: finish initializing array] +-! CHECK: br label %omp.reduction.neutral25 ++! CHECK: br label %omp.reduction.neutral26 + +-! CHECK: omp.reduction.neutral18: ; preds = %omp.reduction.neutral ++! CHECK: omp.reduction.neutral19: ; preds = %omp.reduction.neutral + ! [source length was zero: finish initializing array] +-! CHECK: br label %omp.reduction.neutral20 ++! CHECK: br label %omp.reduction.neutral21 + +-! CHECK: omp.private.copy15: ; preds = %omp.private.copy14 ++! CHECK: omp.private.copy16: ; preds = %omp.private.copy15 + ! [source length was non-zero: call assign runtime] +-! CHECK: br label %omp.private.copy16 ++! CHECK: br label %omp.private.copy17 + +-! CHECK: omp.private.copy11: ; preds = %omp.private.copy10 ++! CHECK: omp.private.copy12: ; preds = %omp.private.copy11 + ! [source length was non-zero: call assign runtime] +-! CHECK: br label %omp.private.copy12 ++! CHECK: br label %omp.private.copy13 + + ! CHECK: omp.private.alloc1: ; preds = %omp.private.alloc + ! [var extent was non-zero: malloc a private array] +@@ -258,5 +258,5 @@ + ! [var extent was non-zero: malloc a private array] + ! CHECK: br label %omp.private.alloc8 + +-! CHECK: omp.par.outlined.exit.exitStub: ; preds = %omp.region.cont50 ++! CHECK: omp.par.outlined.exit.exitStub: ; preds = %omp.region.cont51 + ! CHECK-NEXT: ret void +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Integration/OpenMP/target-filtering.f90 llvm-project-aso/flang/test/Integration/OpenMP/target-filtering.f90 +--- llvm-project-aso-orig/flang/test/Integration/OpenMP/target-filtering.f90 2024-08-27 20:36:25.268172720 -0500 ++++ llvm-project-aso/flang/test/Integration/OpenMP/target-filtering.f90 2024-11-23 20:39:47.188175337 -0600 @@ -7,7 +7,7 @@ !===----------------------------------------------------------------------===! @@ -7448,1776 +8732,395 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Integration/OpenMP/ta !HOST: define {{.*}}@{{.*}}before{{.*}}( !DEVICE-NOT: define {{.*}}@before{{.*}}( -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90 llvm-project/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90 1969-12-31 18:00:00.000000000 -0600 -+++ llvm-project/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -0,0 +1,23 @@ -+! RUN: %flang_fc1 -fopenmp -emit-fir %s -o - | FileCheck %s +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenACC/acc-enter-data.f90 llvm-project-aso/flang/test/Lower/OpenACC/acc-enter-data.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenACC/acc-enter-data.f90 2024-08-27 20:36:25.288172520 -0500 ++++ llvm-project-aso/flang/test/Lower/OpenACC/acc-enter-data.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -808,10 +808,10 @@ + !CHECK: %[[VAL_42:.*]] = arith.constant 1 : index + !CHECK: %[[VAL_43:.*]] = arith.constant 1 : index + !CHECK: %[[VAL_44:.*]] = arith.subi %[[VAL_43]], %[[VAL_38]]#0 : index +-!CHECK: %[[VAL_45:.*]] = acc.bounds lowerbound(%[[VAL_44]] : index) upperbound(%[[VAL_44]] : index) extent(%[[VAL_42]] : index) stride(%[[VAL_42]] : index) startIdx(%[[VAL_38]]#0 : index) ++!CHECK: %[[VAL_45:.*]] = acc.bounds lowerbound(%[[VAL_44]] : index) upperbound(%[[VAL_44]] : index) extent(%[[VAL_38]]#1 : index) stride(%[[VAL_42]] : index) startIdx(%[[VAL_38]]#0 : index) + !CHECK: %[[VAL_46:.*]] = arith.constant 2 : index + !CHECK: %[[VAL_47:.*]] = arith.subi %[[VAL_46]], %[[VAL_40]]#0 : index +-!CHECK: %[[VAL_48:.*]] = acc.bounds lowerbound(%[[VAL_47]] : index) upperbound(%[[VAL_47]] : index) extent(%[[VAL_42]] : index) stride(%[[VAL_42]] : index) startIdx(%[[VAL_40]]#0 : index) ++!CHECK: %[[VAL_48:.*]] = acc.bounds lowerbound(%[[VAL_47]] : index) upperbound(%[[VAL_47]] : index) extent(%[[VAL_40]]#1 : index) stride(%[[VAL_42]] : index) startIdx(%[[VAL_40]]#0 : index) + !CHECK: %[[CREATE:.*]] = acc.create varPtr(%[[VAL_41]] : !fir.heap>) bounds(%[[VAL_45]], %[[VAL_48]]) -> !fir.heap> {name = "e(2_8)%a(1,2)", structured = false} + !CHECK: acc.enter_data dataOperands(%[[CREATE]] : !fir.heap>) + +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/array-bounds.f90 llvm-project-aso/flang/test/Lower/OpenMP/array-bounds.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/array-bounds.f90 2024-11-23 20:25:26.851275134 -0600 ++++ llvm-project-aso/flang/test/Lower/OpenMP/array-bounds.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -65,13 +65,15 @@ + end subroutine assumed_shape_array + + ++ ++ + !HOST-LABEL: func.func @_QMassumed_array_routinesPassumed_size_array( + !HOST-SAME: %[[ARG0:.*]]: !fir.ref> {fir.bindc_name = "arr_read_write"}) { + !HOST: %[[ARG0_SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1> + !HOST: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]](%[[ARG0_SHAPE]]) dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QMassumed_array_routinesFassumed_size_arrayEarr_read_write"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.box>, !fir.ref>) + !HOST: %[[ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMassumed_array_routinesFassumed_size_arrayEi"} + !HOST: %[[DIMS0:.*]]:3 = fir.box_dims %[[ARG0_DECL]]#0, %c0{{.*}} : (!fir.box>, index) -> (index, index, index) +-!HOST: %[[C4_1:.*]] = arith.subi %c4, %c1{{.*}} : index ++!HOST: %[[C4_1:.*]] = arith.subi %c4{{.*}}, %c1{{.*}} : index + !HOST: %[[EXT:.*]] = arith.addi %[[C4_1]], %c1{{.*}} : index + !HOST: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%c1{{.*}} : index) upper_bound(%c4{{.*}} : index) extent(%[[EXT]] : index) stride(%[[DIMS0]]#2 : index) start_idx(%c1{{.*}} : index) {stride_in_bytes = true} + !HOST: %[[MAP:.*]] = omp.map.info var_ptr(%[[ARG0_DECL]]#1 : !fir.ref>, i32) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "arr_read_write(2:5)"} +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/eval-outside-target.f90 llvm-project-aso/flang/test/Lower/OpenMP/eval-outside-target.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/eval-outside-target.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Lower/OpenMP/eval-outside-target.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,157 @@ ++! The "thread_limit" clause was added to the "target" construct in OpenMP 5.1. ++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 %s -o - | FileCheck %s --check-prefixes=BOTH,HOST ++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefixes=BOTH,DEVICE ++ ++! BOTH-LABEL: func.func @_QPteams ++subroutine teams() ++ ! BOTH: omp.target ++ ++ ! HOST-SAME: host_eval(%{{.*}} -> %[[NUM_TEAMS:.*]], %{{.*}} -> %[[THREAD_LIMIT:.*]] : i32, i32) ++ ++ ! DEVICE-NOT: host_eval({{.*}}) ++ ! DEVICE-SAME: { ++ !$omp target + -+! Check that this testcase is lowered to FIR successfully. -+! CHECK: omp.target trip_count ++ ! BOTH: omp.teams + -+module Test -+ use, intrinsic :: ISO_Fortran_env, only: REAL64,INT64 -+ implicit none -+ integer(kind=INT64) :: N -+ real(kind=REAL64), allocatable :: A(:) ++ ! HOST-SAME: num_teams( to %[[NUM_TEAMS]] : i32) thread_limit(%[[THREAD_LIMIT]] : i32) ++ ! DEVICE-SAME: num_teams({{.*}}) thread_limit({{.*}}) ++ !$omp teams num_teams(1) thread_limit(2) ++ call foo() ++ !$omp end teams + -+ contains -+ subroutine init_arrays(initA) -+ implicit none -+ real(kind=REAL64), intent(in) :: initA -+ integer(kind=INT64) :: i -+ !$omp target teams distribute parallel do -+ do i = 1, N -+ A(i) = initA -+ end do -+ end subroutine init_arrays ++ !$omp end target + -+end module Test -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/function-filtering-2.f90 llvm-project/flang/test/Lower/OpenMP/function-filtering-2.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/function-filtering-2.f90 2024-06-12 10:43:12.676209906 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/function-filtering-2.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -1,9 +1,9 @@ - ! RUN: %flang_fc1 -fopenmp -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-HOST %s - ! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s --! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-DEVICE %s --! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s -+! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-DEVICE %s -+! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s - ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s --! RUN: bbc -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s -+! RUN: bbc -target amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s - - ! MLIR: func.func @{{.*}}implicit_invocation() attributes {omp.declare_target = #omp.declaretarget} - ! MLIR: return -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/function-filtering-3.f90 llvm-project/flang/test/Lower/OpenMP/function-filtering-3.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/function-filtering-3.f90 2024-06-12 10:43:12.676209906 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/function-filtering-3.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -1,9 +1,9 @@ - ! RUN: %flang_fc1 -fopenmp -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-HOST,LLVM-ALL %s - ! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s --! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s --! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s -+! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s -+! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s - ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s --! RUN: bbc -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s -+! RUN: bbc -fopenmp -target amdgcn-amd-amdhsa -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s - - ! Check that the correct LLVM IR functions are kept for the host and device - ! after running the whole set of translation and transformation passes from -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/function-filtering.f90 llvm-project/flang/test/Lower/OpenMP/function-filtering.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/function-filtering.f90 2024-02-15 09:48:32.631800569 -0600 -+++ llvm-project/flang/test/Lower/OpenMP/function-filtering.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -1,9 +1,9 @@ - ! RUN: %flang_fc1 -fopenmp -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-HOST,LLVM-ALL %s - ! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s --! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s --! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s -+! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s -+! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s - ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s --! RUN: bbc -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s -+! RUN: bbc -target amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s - - ! Check that the correct LLVM IR functions are kept for the host and device - ! after running the whole set of translation and transformation passes from -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/if-clause.f90 llvm-project/flang/test/Lower/OpenMP/if-clause.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/if-clause.f90 2024-06-12 10:43:12.676209906 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/if-clause.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -9,23 +9,191 @@ - integer :: i - - ! TODO When they are supported, add tests for: -- ! - DISTRIBUTE PARALLEL DO -- ! - DISTRIBUTE PARALLEL DO SIMD -- ! - DISTRIBUTE SIMD - ! - PARALLEL SECTIONS - ! - PARALLEL WORKSHARE -- ! - TARGET PARALLEL -- ! - TARGET TEAMS DISTRIBUTE -- ! - TARGET TEAMS DISTRIBUTE PARALLEL DO -- ! - TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD -- ! - TARGET TEAMS DISTRIBUTE SIMD -- ! - TARGET UPDATE - ! - TASKLOOP - ! - TASKLOOP SIMD -- ! - TEAMS DISTRIBUTE -- ! - TEAMS DISTRIBUTE PARALLEL DO -- ! - TEAMS DISTRIBUTE PARALLEL DO SIMD -- ! - TEAMS DISTRIBUTE SIMD ++ ! BOTH: omp.teams ++ ! BOTH-SAME: num_teams({{.*}}) thread_limit({{.*}}) { ++ !$omp teams num_teams(1) thread_limit(2) ++ call foo() ++ !$omp end teams ++end subroutine teams + -+ ! ---------------------------------------------------------------------------- -+ ! DISTRIBUTE PARALLEL DO SIMD -+ ! ---------------------------------------------------------------------------- -+ !$omp teams ++! BOTH-LABEL: func.func @_QPdistribute_parallel_do ++subroutine distribute_parallel_do() ++ ! BOTH: omp.target ++ ++ ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]], %{{.*}} -> %[[NUM_THREADS:.*]] : i32, i32, i32, i32) ++ ++ ! DEVICE-NOT: host_eval({{.*}}) ++ ! DEVICE-SAME: { + -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp distribute parallel do simd -+ do i = 1, 10 -+ end do -+ !$omp end distribute parallel do simd ++ ! BOTH: omp.teams ++ !$omp target teams + -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp distribute parallel do simd if(.true.) -+ do i = 1, 10 -+ end do -+ !$omp end distribute parallel do simd ++ ! BOTH: omp.parallel + -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp distribute parallel do simd if(parallel: .true.) if(simd: .false.) -+ do i = 1, 10 -+ end do -+ !$omp end distribute parallel do simd ++ ! HOST-SAME: num_threads(%[[NUM_THREADS]] : i32) ++ ! DEVICE-SAME: num_threads({{.*}}) + -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp distribute parallel do simd if(parallel: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end distribute parallel do simd -+ -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp distribute parallel do simd if(simd: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end distribute parallel do simd -+ -+ !$omp end teams -+ -+ ! ---------------------------------------------------------------------------- -+ ! DISTRIBUTE PARALLEL DO -+ ! ---------------------------------------------------------------------------- -+ !$omp teams -+ -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp distribute parallel do -+ do i = 1, 10 -+ end do -+ !$omp end distribute parallel do ++ ! BOTH: omp.distribute ++ ! BOTH-NEXT: omp.wsloop ++ ! BOTH-NEXT: omp.loop_nest + -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp distribute parallel do if(.true.) -+ do i = 1, 10 ++ ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) ++ !$omp distribute parallel do num_threads(1) ++ do i=1,10 ++ call foo() + end do + !$omp end distribute parallel do -+ -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp distribute parallel do if(parallel: .true.) -+ do i = 1, 10 ++ !$omp end target teams ++ ++ ! BOTH: omp.target ++ ! BOTH-NOT: host_eval({{.*}}) ++ ! BOTH-SAME: { ++ ! BOTH: omp.teams ++ !$omp target teams ++ call foo() !< Prevents this from being SPMD. ++ ++ ! BOTH: omp.parallel ++ ! BOTH-SAME: num_threads({{.*}}) ++ ! BOTH: omp.distribute ++ ! BOTH-NEXT: omp.wsloop ++ !$omp distribute parallel do num_threads(1) ++ do i=1,10 ++ call foo() + end do + !$omp end distribute parallel do ++ !$omp end target teams + -+ !$omp end teams -+ -+ ! ---------------------------------------------------------------------------- -+ ! DISTRIBUTE SIMD -+ ! ---------------------------------------------------------------------------- ++ ! BOTH: omp.teams + !$omp teams -+ -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp distribute simd -+ do i = 1, 10 -+ end do -+ !$omp end distribute simd -+ -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp distribute simd if(.true.) -+ do i = 1, 10 -+ end do -+ !$omp end distribute simd -+ -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp distribute simd if(simd: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end distribute simd -+ -+ !$omp end teams - - ! ---------------------------------------------------------------------------- - ! DO SIMD -@@ -33,18 +201,31 @@ - ! CHECK: omp.wsloop - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { - !$omp do simd - do i = 1, 10 - end do - !$omp end do simd - - ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp do simd if(.true.) - do i = 1, 10 - end do - !$omp end do simd - - ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp do simd if(simd: .true.) - do i = 1, 10 - end do -@@ -62,12 +243,14 @@ - - ! CHECK: omp.parallel - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp parallel if(.true.) - i = 10 - !$omp end parallel - - ! CHECK: omp.parallel - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp parallel if(parallel: .true.) - i = 10 - !$omp end parallel -@@ -78,6 +261,9 @@ - ! CHECK: omp.parallel - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { - !$omp parallel do - do i = 1, 10 - end do -@@ -85,6 +271,10 @@ - - ! CHECK: omp.parallel - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { - !$omp parallel do if(.true.) - do i = 1, 10 - end do -@@ -92,6 +282,10 @@ - - ! CHECK: omp.parallel - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { - !$omp parallel do if(parallel: .true.) - do i = 1, 10 - end do -@@ -106,6 +300,9 @@ - ! CHECK: omp.wsloop - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { - !$omp parallel do simd - do i = 1, 10 - end do -@@ -113,7 +310,13 @@ - - ! CHECK: omp.parallel - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp parallel do simd if(.true.) - do i = 1, 10 - end do -@@ -121,7 +324,13 @@ - - ! CHECK: omp.parallel - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp parallel do simd if(parallel: .true.) if(simd: .false.) - do i = 1, 10 - end do -@@ -132,6 +341,9 @@ - ! CHECK: omp.wsloop - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { - !$omp parallel do simd if(parallel: .true.) - do i = 1, 10 - end do -@@ -141,6 +353,11 @@ - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { - ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp parallel do simd if(simd: .true.) - do i = 1, 10 - end do -@@ -159,6 +376,7 @@ - - ! CHECK: omp.simd - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp simd if(.true.) - do i = 1, 10 - end do -@@ -166,6 +384,7 @@ - - ! CHECK: omp.simd - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp simd if(simd: .true.) - do i = 1, 10 - end do -@@ -182,11 +401,13 @@ - - ! CHECK: omp.target - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target if(.true.) - !$omp end target - - ! CHECK: omp.target - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target if(target: .true.) - !$omp end target - -@@ -201,11 +422,13 @@ - - ! CHECK: omp.target_data - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target data map(tofrom: i) if(.true.) - !$omp end target data - - ! CHECK: omp.target_data - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target data map(tofrom: i) if(target data: .true.) - !$omp end target data - -@@ -214,7 +437,6 @@ - ! ---------------------------------------------------------------------------- - ! CHECK: omp.target_enter_data - ! CHECK-NOT: if({{.*}}) -- ! CHECK-SAME: map - !$omp target enter data map(to: i) - - ! CHECK: omp.target_enter_data -@@ -230,7 +452,6 @@ - ! ---------------------------------------------------------------------------- - ! CHECK: omp.target_exit_data - ! CHECK-NOT: if({{.*}}) -- ! CHECK-SAME: map - !$omp target exit data map(from: i) - - ! CHECK: omp.target_exit_data -@@ -250,6 +471,9 @@ - ! CHECK: omp.parallel - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target parallel do - do i = 1, 10 - end do -@@ -257,8 +481,13 @@ - - ! CHECK: omp.target - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.parallel - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target parallel do if(.true.) - do i = 1, 10 - end do -@@ -266,8 +495,13 @@ - - ! CHECK: omp.target - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.parallel - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target parallel do if(target: .true.) if(parallel: .false.) - do i = 1, 10 - end do -@@ -275,9 +509,13 @@ - - ! CHECK: omp.target - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.parallel - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target parallel do if(target: .true.) - do i = 1, 10 - end do -@@ -288,6 +526,10 @@ - ! CHECK-SAME: { - ! CHECK: omp.parallel - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target parallel do if(parallel: .true.) - do i = 1, 10 - end do -@@ -305,6 +547,9 @@ - ! CHECK: omp.wsloop - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target parallel do simd - do i = 1, 10 - end do -@@ -312,9 +557,16 @@ - - ! CHECK: omp.target - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.parallel - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target parallel do simd if(.true.) - do i = 1, 10 - end do -@@ -322,9 +574,16 @@ - - ! CHECK: omp.target - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.parallel - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target parallel do simd if(target: .true.) if(parallel: .false.) & - !$omp& if(simd: .true.) - do i = 1, 10 -@@ -333,12 +592,16 @@ - - ! CHECK: omp.target - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.parallel - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { - ! CHECK: omp.wsloop - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target parallel do simd if(target: .true.) - do i = 1, 10 - end do -@@ -349,13 +612,72 @@ - ! CHECK-SAME: { - ! CHECK: omp.parallel - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target parallel do simd if(parallel: .true.) if(simd: .false.) - do i = 1, 10 - end do - !$omp end target parallel do simd - - ! ---------------------------------------------------------------------------- -+ ! TARGET PARALLEL -+ ! ---------------------------------------------------------------------------- -+ ! CHECK: omp.target -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target parallel -+ i = 1 -+ !$omp end target parallel -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target parallel if(.true.) -+ i = 1 -+ !$omp end target parallel -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target parallel if(target: .true.) if(parallel: .false.) -+ i = 1 -+ !$omp end target parallel -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target parallel if(target: .true.) -+ i = 1 -+ !$omp end target parallel -+ -+ ! CHECK: omp.target -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target parallel if(parallel: .true.) -+ i = 1 -+ !$omp end target parallel -+ -+ ! ---------------------------------------------------------------------------- - ! TARGET SIMD - ! ---------------------------------------------------------------------------- - ! CHECK: omp.target -@@ -371,8 +693,10 @@ - - ! CHECK: omp.target - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.simd - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target simd if(.true.) - do i = 1, 10 - end do -@@ -380,8 +704,10 @@ - - ! CHECK: omp.target - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.simd - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target simd if(target: .true.) if(simd: .false.) - do i = 1, 10 - end do -@@ -389,6 +715,7 @@ - - ! CHECK: omp.target - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.simd - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { -@@ -402,14 +729,438 @@ - ! CHECK-SAME: { - ! CHECK: omp.simd - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target simd if(simd: .true.) - do i = 1, 10 - end do - !$omp end target simd - - ! ---------------------------------------------------------------------------- -- ! TARGET TEAMS -+ ! TARGET TEAMS DISTRIBUTE -+ ! ---------------------------------------------------------------------------- -+ ! CHECK: omp.target -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute if(.true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute if(target: .true.) if(teams: .false.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute if(target: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute -+ -+ ! CHECK: omp.target -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute if(teams: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute -+ -+ ! ---------------------------------------------------------------------------- -+ ! TARGET TEAMS DISTRIBUTE PARALLEL DO -+ ! ---------------------------------------------------------------------------- -+ ! CHECK: omp.target -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute parallel do -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute parallel do if(.true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute parallel do if(target: .true.) if(teams: .false.) if(parallel: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute parallel do if(target: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do -+ -+ ! CHECK: omp.target -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute parallel do if(teams: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do -+ -+ ! CHECK: omp.target -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute parallel do if(parallel: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do -+ -+ ! ---------------------------------------------------------------------------- -+ ! TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD -+ ! ---------------------------------------------------------------------------- -+ ! CHECK: omp.target -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute parallel do simd -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do simd -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute parallel do simd if(.true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do simd -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute parallel do simd if(target: .true.) if(teams: .false.) if(parallel: .true.) if(simd: .false.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do simd -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute parallel do simd if(target: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do simd -+ -+ ! CHECK: omp.target -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute parallel do simd if(teams: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do simd -+ -+ ! CHECK: omp.target -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute parallel do simd if(parallel: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do simd -+ -+ ! CHECK: omp.target -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute parallel do simd if(simd: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do simd -+ -+ ! ---------------------------------------------------------------------------- -+ ! TARGET TEAMS DISTRIBUTE SIMD - ! ---------------------------------------------------------------------------- -+ ! CHECK: omp.target -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute simd -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute simd -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute simd if(.true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute simd -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute simd if(target: .true.) if(teams: .false.) if(simd: .false.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute simd -+ -+ ! CHECK: omp.target -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute simd if(target: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute simd -+ -+ ! CHECK: omp.target -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute simd if(teams: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute simd - - ! CHECK: omp.target - ! CHECK-NOT: if({{.*}}) -@@ -417,28 +1168,53 @@ - ! CHECK: omp.teams - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp target teams distribute simd if(simd: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute simd -+ -+ ! ---------------------------------------------------------------------------- -+ ! TARGET TEAMS -+ ! ---------------------------------------------------------------------------- -+ ! CHECK: omp.target -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target teams - i = 1 - !$omp end target teams - - ! CHECK: omp.target - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.teams - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target teams if(.true.) - i = 1 - !$omp end target teams - - ! CHECK: omp.target - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.teams - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target teams if(target: .true.) if(teams: .false.) - i = 1 - !$omp end target teams - - ! CHECK: omp.target - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - ! CHECK: omp.teams - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { -@@ -451,11 +1227,28 @@ - ! CHECK-SAME: { - ! CHECK: omp.teams - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp target teams if(teams: .true.) - i = 1 - !$omp end target teams - - ! ---------------------------------------------------------------------------- -+ ! TARGET UPDATE -+ ! ---------------------------------------------------------------------------- -+ -+ ! CHECK: omp.target_update -+ ! CHECK-NOT: if({{.*}}) -+ !$omp target update to(i) -+ -+ ! CHECK: omp.target_update -+ ! CHECK-SAME: if({{.*}}) -+ !$omp target update to(i) if(.true.) -+ -+ ! CHECK: omp.target_update -+ ! CHECK-SAME: if({{.*}}) -+ !$omp target update to(i) if(target update: .true.) -+ -+ ! ---------------------------------------------------------------------------- - ! TASK - ! ---------------------------------------------------------------------------- - ! CHECK: omp.task -@@ -466,15 +1259,336 @@ - - ! CHECK: omp.task - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp task if(.true.) - !$omp end task - - ! CHECK: omp.task - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp task if(task: .true.) - !$omp end task - - ! ---------------------------------------------------------------------------- -+ ! TEAMS DISTRIBUTE -+ ! ---------------------------------------------------------------------------- -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute -+ -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute if(.true.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute -+ -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute if(teams: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute -+ -+ ! ---------------------------------------------------------------------------- -+ ! TEAMS DISTRIBUTE PARALLEL DO -+ ! ---------------------------------------------------------------------------- -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute parallel do -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute parallel do -+ -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute parallel do if(.true.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute parallel do -+ -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute parallel do if(teams: .true.) if(parallel: .false.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute parallel do -+ -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute parallel do if(teams: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute parallel do -+ -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute parallel do if(parallel: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute parallel do -+ -+ ! ---------------------------------------------------------------------------- -+ ! TEAMS DISTRIBUTE PARALLEL DO SIMD -+ ! ---------------------------------------------------------------------------- -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute parallel do simd -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute parallel do simd -+ -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute parallel do simd if(.true.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute parallel do simd -+ -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute parallel do simd if(teams: .false.) if(parallel: .true.) if(simd: .false.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute parallel do simd -+ -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute parallel do simd if(teams: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute parallel do simd -+ -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute parallel do simd if(parallel: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute parallel do simd -+ -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.parallel -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.wsloop -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute parallel do simd if(simd: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute parallel do simd -+ -+ ! ---------------------------------------------------------------------------- -+ ! TEAMS DISTRIBUTE SIMD -+ ! ---------------------------------------------------------------------------- -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute simd -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute simd -+ -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute simd if(.true.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute simd -+ -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute simd if(teams: .true.) if(simd: .false.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute simd -+ -+ ! CHECK: omp.teams -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute simd if(teams: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute simd -+ -+ ! CHECK: omp.teams -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.distribute -+ ! CHECK-NOT: if({{.*}}) -+ ! CHECK-SAME: { -+ ! CHECK: omp.simd -+ ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { -+ !$omp teams distribute simd if(simd: .true.) -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute simd -+ -+ ! ---------------------------------------------------------------------------- - ! TEAMS - ! ---------------------------------------------------------------------------- - ! CHECK: omp.teams -@@ -486,12 +1600,14 @@ - - ! CHECK: omp.teams - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp teams if(.true.) - i = 1 - !$omp end teams - - ! CHECK: omp.teams - ! CHECK-SAME: if({{.*}}) -+ ! CHECK-SAME: { - !$omp teams if(teams: .true.) - i = 1 - !$omp end teams -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/loop-combined.f90 llvm-project/flang/test/Lower/OpenMP/loop-combined.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/loop-combined.f90 2024-06-12 10:43:12.676209906 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/loop-combined.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -6,19 +6,51 @@ - program main - integer :: i - -- ! TODO When DISTRIBUTE, TASKLOOP and TEAMS are supported add: -- ! - DISTRIBUTE PARALLEL DO SIMD -- ! - DISTRIBUTE PARALLEL DO -- ! - DISTRIBUTE SIMD -- ! - TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD -- ! - TARGET TEAMS DISTRIBUTE PARALLEL DO -- ! - TARGET TEAMS DISTRIBUTE SIMD -- ! - TARGET TEAMS DISTRIBUTE -- ! - TASKLOOP SIMD -- ! - TEAMS DISTRIBUTE PARALLEL DO SIMD -- ! - TEAMS DISTRIBUTE PARALLEL DO -- ! - TEAMS DISTRIBUTE SIMD -- ! - TEAMS DISTRIBUTE -+ ! TODO TASKLOOP SIMD -+ -+ ! ---------------------------------------------------------------------------- -+ ! DISTRIBUTE PARALLEL DO SIMD -+ ! ---------------------------------------------------------------------------- -+ !$omp teams -+ -+ ! CHECK: omp.distribute -+ ! CHECK: omp.parallel -+ ! CHECK: omp.wsloop -+ !$omp distribute parallel do simd -+ do i = 1, 10 -+ end do -+ !$omp end distribute parallel do simd -+ -+ !$omp end teams -+ -+ ! ---------------------------------------------------------------------------- -+ ! DISTRIBUTE PARALLEL DO -+ ! ---------------------------------------------------------------------------- -+ !$omp teams -+ -+ ! CHECK: omp.distribute -+ ! CHECK: omp.parallel -+ ! CHECK: omp.wsloop -+ !$omp distribute parallel do -+ do i = 1, 10 -+ end do -+ !$omp end distribute parallel do -+ -+ !$omp end teams -+ -+ ! ---------------------------------------------------------------------------- -+ ! DISTRIBUTE SIMD -+ ! ---------------------------------------------------------------------------- -+ !$omp teams -+ -+ ! CHECK: omp.distribute -+ ! CHECK: omp.simd -+ !$omp distribute simd -+ do i = 1, 10 -+ end do -+ !$omp end distribute simd -+ -+ !$omp end teams - - ! ---------------------------------------------------------------------------- - ! DO SIMD -@@ -72,6 +104,59 @@ - !$omp end target parallel do - - ! ---------------------------------------------------------------------------- -+ ! TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD -+ ! ---------------------------------------------------------------------------- -+ -+ ! CHECK: omp.target -+ ! CHECK: omp.teams -+ ! CHECK: omp.distribute -+ ! CHECK: omp.parallel -+ ! CHECK: omp.wsloop -+ !$omp target teams distribute parallel do simd -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do simd -+ -+ ! ---------------------------------------------------------------------------- -+ ! TARGET TEAMS DISTRIBUTE PARALLEL DO -+ ! ---------------------------------------------------------------------------- -+ -+ ! CHECK: omp.target -+ ! CHECK: omp.teams -+ ! CHECK: omp.distribute -+ ! CHECK: omp.parallel -+ ! CHECK: omp.wsloop -+ !$omp target teams distribute parallel do -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute parallel do -+ -+ ! ---------------------------------------------------------------------------- -+ ! TARGET TEAMS DISTRIBUTE SIMD -+ ! ---------------------------------------------------------------------------- -+ -+ ! CHECK: omp.target -+ ! CHECK: omp.teams -+ ! CHECK: omp.distribute -+ ! CHECK: omp.simd -+ !$omp target teams distribute simd -+ do i = 1, 10 ++ ++ ! BOTH: omp.parallel ++ ! BOTH-SAME: num_threads({{.*}}) ++ ! BOTH: omp.distribute ++ ! BOTH-NEXT: omp.wsloop ++ !$omp distribute parallel do num_threads(1) ++ do i=1,10 ++ call foo() + end do -+ !$omp end target teams distribute simd ++ !$omp end distribute parallel do ++ !$omp end teams ++end subroutine distribute_parallel_do + -+ ! ---------------------------------------------------------------------------- -+ ! TARGET TEAMS DISTRIBUTE -+ ! ---------------------------------------------------------------------------- ++! BOTH-LABEL: func.func @_QPdistribute_parallel_do_simd ++subroutine distribute_parallel_do_simd() ++ ! BOTH: omp.target ++ ++ ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]], %{{.*}} -> %[[NUM_THREADS:.*]] : i32, i32, i32, i32) ++ ++ ! DEVICE-NOT: host_eval({{.*}}) ++ ! DEVICE-SAME: { + -+ ! CHECK: omp.target -+ ! CHECK: omp.teams -+ ! CHECK: omp.distribute -+ !$omp target teams distribute -+ do i = 1, 10 -+ end do -+ !$omp end target teams distribute ++ ! BOTH: omp.teams ++ !$omp target teams + -+ ! ---------------------------------------------------------------------------- - ! TARGET SIMD - ! ---------------------------------------------------------------------------- - ! CHECK: omp.target -@@ -80,4 +165,54 @@ - do i = 1, 10 - end do - !$omp end target simd ++ ! BOTH: omp.parallel + -+ ! ---------------------------------------------------------------------------- -+ ! TEAMS DISTRIBUTE PARALLEL DO SIMD -+ ! ---------------------------------------------------------------------------- ++ ! HOST-SAME: num_threads(%[[NUM_THREADS]] : i32) ++ ! DEVICE-SAME: num_threads({{.*}}) + -+ ! CHECK: omp.teams -+ ! CHECK: omp.distribute -+ ! CHECK: omp.parallel -+ ! CHECK: omp.wsloop -+ !$omp teams distribute parallel do simd -+ do i = 1, 10 ++ ! BOTH: omp.distribute ++ ! BOTH-NEXT: omp.wsloop ++ ! BOTH-NEXT: omp.simd ++ ! BOTH-NEXT: omp.loop_nest ++ ++ ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) ++ !$omp distribute parallel do simd num_threads(1) ++ do i=1,10 ++ call foo() + end do -+ !$omp end teams distribute parallel do simd ++ !$omp end distribute parallel do simd ++ !$omp end target teams ++ ++ ! BOTH: omp.target ++ ! BOTH-NOT: host_eval({{.*}}) ++ ! BOTH-SAME: { ++ ! BOTH: omp.teams ++ !$omp target teams ++ call foo() !< Prevents this from being SPMD. ++ ++ ! BOTH: omp.parallel ++ ! BOTH-SAME: num_threads({{.*}}) ++ ! BOTH: omp.distribute ++ ! BOTH-NEXT: omp.wsloop ++ ! BOTH-NEXT: omp.simd ++ !$omp distribute parallel do simd num_threads(1) ++ do i=1,10 ++ call foo() ++ end do ++ !$omp end distribute parallel do simd ++ !$omp end target teams + -+ ! ---------------------------------------------------------------------------- -+ ! TEAMS DISTRIBUTE PARALLEL DO -+ ! ---------------------------------------------------------------------------- ++ ! BOTH: omp.teams ++ !$omp teams + -+ ! CHECK: omp.teams -+ ! CHECK: omp.distribute -+ ! CHECK: omp.parallel -+ ! CHECK: omp.wsloop -+ !$omp teams distribute parallel do -+ do i = 1, 10 ++ ! BOTH: omp.parallel ++ ! BOTH-SAME: num_threads({{.*}}) ++ ! BOTH: omp.distribute ++ ! BOTH-NEXT: omp.wsloop ++ ! BOTH-NEXT: omp.simd ++ !$omp distribute parallel do simd num_threads(1) ++ do i=1,10 ++ call foo() + end do -+ !$omp end teams distribute parallel do ++ !$omp end distribute parallel do simd ++ !$omp end teams ++end subroutine distribute_parallel_do_simd +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90 llvm-project-aso/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,35 @@ ++! RUN: %flang_fc1 -fopenmp -emit-fir %s -o - | FileCheck %s + -+ ! ---------------------------------------------------------------------------- -+ ! TEAMS DISTRIBUTE SIMD -+ ! ---------------------------------------------------------------------------- ++! Check that this testcase is lowered to FIR successfully. + -+ ! CHECK: omp.teams -+ ! CHECK: omp.distribute -+ ! CHECK: omp.simd -+ !$omp teams distribute simd -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute simd ++! CHECK: %[[ONE:.*]] = arith.constant 1 : i32 ++! CHECK: %[[DECL_N:.*]] = fir.declare %{{.*}} {uniq_name = "_QMtestEn"} : (!fir.ref) -> !fir.ref ++! CHECK: %[[HOST_N:.*]] = fir.load %[[DECL_N]] : !fir.ref ++! CHECK: %[[HOST_LB:.*]] = fir.convert %[[ONE]] : (i32) -> i64 ++! CHECK: %[[HOST_STEP:.*]] = fir.convert %[[ONE]] : (i32) -> i64 ++! CHECK: omp.target ++! CHECK-SAME: host_eval(%[[HOST_LB]] -> %[[LB:[[:alnum:]]+]], %[[HOST_N]] -> %[[UB:[[:alnum:]]+]], %[[HOST_STEP]] -> %[[STEP:[[:alnum:]]+]] : i64, i64, i64) ++! CHECK: omp.teams ++! CHECK: omp.parallel ++! CHECK: omp.distribute ++! CHECK-NEXT: omp.wsloop ++! CHECK-NEXT: omp.loop_nest ({{.*}}) : i64 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + -+ ! ---------------------------------------------------------------------------- -+ ! TEAMS DISTRIBUTE ++module Test ++ use, intrinsic :: ISO_Fortran_env, only: REAL64,INT64 ++ implicit none ++ integer(kind=INT64) :: N ++ real(kind=REAL64), allocatable :: A(:) ++ ++ contains ++ subroutine init_arrays(initA) ++ implicit none ++ real(kind=REAL64), intent(in) :: initA ++ integer(kind=INT64) :: i ++ !$omp target teams distribute parallel do ++ do i = 1, N ++ A(i) = initA ++ end do ++ end subroutine init_arrays ++ ++end module Test +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/function-filtering-2.f90 llvm-project-aso/flang/test/Lower/OpenMP/function-filtering-2.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/function-filtering-2.f90 2024-10-18 14:35:01.031241608 -0500 ++++ llvm-project-aso/flang/test/Lower/OpenMP/function-filtering-2.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -1,9 +1,9 @@ + ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-HOST %s + ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s +-! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-DEVICE %s +-! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s ++! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-DEVICE %s ++! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s + ! RUN: bbc -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s +-! RUN: bbc -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s ++! RUN: bbc -target amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s + + ! MLIR: func.func @{{.*}}implicit_invocation() attributes {omp.declare_target = #omp.declaretarget} + ! MLIR: return +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/function-filtering-3.f90 llvm-project-aso/flang/test/Lower/OpenMP/function-filtering-3.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/function-filtering-3.f90 2024-08-27 20:36:25.292172480 -0500 ++++ llvm-project-aso/flang/test/Lower/OpenMP/function-filtering-3.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -1,9 +1,9 @@ + ! RUN: %flang_fc1 -fopenmp -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-HOST,LLVM-ALL %s + ! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s +-! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s +-! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s ++! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s ++! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s + ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s +-! RUN: bbc -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s ++! RUN: bbc -fopenmp -target amdgcn-amd-amdhsa -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s + + ! Check that the correct LLVM IR functions are kept for the host and device + ! after running the whole set of translation and transformation passes from +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/function-filtering.f90 llvm-project-aso/flang/test/Lower/OpenMP/function-filtering.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/function-filtering.f90 2024-10-18 14:35:01.031241608 -0500 ++++ llvm-project-aso/flang/test/Lower/OpenMP/function-filtering.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -1,9 +1,9 @@ + ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-HOST,LLVM-ALL %s + ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s +-! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s +-! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s ++! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s ++! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s + ! RUN: bbc -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s +-! RUN: bbc -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s ++! RUN: bbc -target amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s + + ! Check that the correct LLVM IR functions are kept for the host and device + ! after running the whole set of translation and transformation passes from +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir llvm-project-aso/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,64 @@ ++// Tests HLFIR-to-FIR conversion aspects relevant to OpenMP. For example, that ++// the correct alloca block is chosen for OMP regions. ++ ++// RUN: fir-opt --convert-hlfir-to-fir %s -o - | \ ++// RUN: FileCheck %s ++ ++fir.global internal @_QQro.1xi4.0(dense<42> : tensor<1xi32>) constant : !fir.array<1xi32> ++ ++func.func @_QPfoo() { ++ %c1 = arith.constant 1 : index ++ %host_alloc = fir.alloca !fir.array<1xi32> {bindc_name = "arr", uniq_name = "_QFfooEarr"} ++ ++ %1 = fir.shape %c1 : (index) -> !fir.shape<1> ++ %host_decl:2 = hlfir.declare %host_alloc(%1) {uniq_name = "_QFfooEarr"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ++ %map_info = omp.map.info var_ptr(%host_decl#1 : !fir.ref>, !fir.array<1xi32>) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref> {name = "arr"} ++ ++ // CHECK: omp.target ++ omp.target map_entries(%map_info -> %arg1 : !fir.ref>) { ++ %c1_2 = arith.constant 1 : index ++ %21 = fir.shape %c1_2 : (index) -> !fir.shape<1> ++ ++ // CHECK: %[[TARGET_DECL:.*]] = fir.declare ++ %target_decl:2 = hlfir.declare %arg1(%21) {uniq_name = "_QFfooEarr"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ++ ++ // CHECK: omp.teams ++ omp.teams { ++ %c1_3 = arith.constant 1 : i32 ++ %c10 = arith.constant 10 : i32 ++ ++ // CHECK: omp.parallel ++ omp.parallel { ++ // CHECK: %[[TO_BOX_ALLOC:.*]] = fir.alloca !fir.box> {pinned} ++ // CHECK: omp.distribute ++ omp.distribute { ++ // CHECK: omp.wsloop ++ omp.wsloop { ++ // CHECK: omp.loop_nest ++ omp.loop_nest (%arg2) : i32 = (%c1_3) to (%c10) inclusive step (%c1_3) { ++ %25 = fir.address_of(@_QQro.1xi4.0) : !fir.ref> ++ %26 = fir.shape %c1_2 : (index) -> !fir.shape<1> ++ %27:2 = hlfir.declare %25(%26) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.1xi4.0"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ++ ++ ++ // CHECK: %[[EMBOX:.*]] = fir.embox %[[TARGET_DECL]] ++ // CHECK: fir.store %[[EMBOX]] to %[[TO_BOX_ALLOC]] ++ // CHECK: %[[BOX_ALLOC_CONV:.*]] = fir.convert %[[TO_BOX_ALLOC]] : (!fir.ref>>) -> !fir.ref> ++ // CHECK: fir.call @_FortranAAssign(%[[BOX_ALLOC_CONV]], {{.*}}) ++ hlfir.assign %27#0 to %target_decl#0 : !fir.ref>, !fir.ref> ++ // CHECK: omp.yield ++ omp.yield ++ } ++ } {omp.composite} ++ } {omp.composite} ++ // CHECK: omp.terminator ++ omp.terminator ++ } {omp.composite} ++ // CHECK: omp.terminator ++ omp.terminator ++ } ++ // CHECK: omp.terminator ++ omp.terminator ++ } ++ return ++} +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/if-clause.f90 llvm-project-aso/flang/test/Lower/OpenMP/if-clause.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/if-clause.f90 2024-09-13 09:46:38.870303386 -0500 ++++ llvm-project-aso/flang/test/Lower/OpenMP/if-clause.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -11,7 +11,6 @@ + ! TODO When they are supported, add tests for: + ! - PARALLEL SECTIONS + ! - PARALLEL WORKSHARE +- ! - TARGET UPDATE + ! - TASKLOOP + ! - TASKLOOP SIMD + +@@ -1225,6 +1224,22 @@ + !$omp end target teams + + ! ---------------------------------------------------------------------------- ++ ! TARGET UPDATE + ! ---------------------------------------------------------------------------- + -+ ! CHECK: omp.teams -+ ! CHECK: omp.distribute -+ !$omp teams distribute -+ do i = 1, 10 -+ end do -+ !$omp end teams distribute ++ ! CHECK: omp.target_update ++ ! CHECK-NOT: if({{.*}}) ++ !$omp target update to(i) + - end program main -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/loop-lifetime.f90 llvm-project/flang/test/Lower/OpenMP/loop-lifetime.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/loop-lifetime.f90 1969-12-31 18:00:00.000000000 -0600 -+++ llvm-project/flang/test/Lower/OpenMP/loop-lifetime.f90 2024-06-12 10:44:09.351614239 -0500 ++ ! CHECK: omp.target_update ++ ! CHECK-SAME: if({{.*}}) ++ !$omp target update to(i) if(.true.) ++ ++ ! CHECK: omp.target_update ++ ! CHECK-SAME: if({{.*}}) ++ !$omp target update to(i) if(target update: .true.) ++ ++ ! ---------------------------------------------------------------------------- + ! TASK + ! ---------------------------------------------------------------------------- + ! CHECK: omp.task +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/loop-lifetime.f90 llvm-project-aso/flang/test/Lower/OpenMP/loop-lifetime.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/loop-lifetime.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Lower/OpenMP/loop-lifetime.f90 2024-11-23 20:39:47.192175322 -0600 @@ -0,0 +1,91 @@ +! This test checks the insertion of lifetime information for loop indices of +! OpenMP loop operations. @@ -9225,12 +9128,12 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/loop-lif + +! CHECK-LABEL: define void @wsloop_i32 +subroutine wsloop_i32() ++ ! CHECK: %[[I_PRIV:.*]] = alloca i32 ++ ! CHECK: %[[I:.*]] = alloca i32 + ! CHECK: %[[LASTITER:.*]] = alloca i32 + ! CHECK: %[[LB:.*]] = alloca i32 + ! CHECK: %[[UB:.*]] = alloca i32 + ! CHECK: %[[STRIDE:.*]] = alloca i32 -+ ! CHECK: %[[I:.*]] = alloca i32 -+ ! CHECK: %[[I_PRIV:.*]] = alloca i32 + integer :: i + + ! CHECK: call void @llvm.lifetime.start.p0(i64 4, ptr %[[I_PRIV]]) @@ -9249,12 +9152,12 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/loop-lif + +! CHECK-LABEL: define void @wsloop_i64 +subroutine wsloop_i64() ++ ! CHECK-DAG: %[[I_PRIV:.*]] = alloca i64 ++ ! CHECK-DAG: %[[I:.*]] = alloca i64 + ! CHECK-DAG: %[[LASTITER:.*]] = alloca i32 + ! CHECK-DAG: %[[LB:.*]] = alloca i64 + ! CHECK-DAG: %[[UB:.*]] = alloca i64 + ! CHECK-DAG: %[[STRIDE:.*]] = alloca i64 -+ ! CHECK-DAG: %[[I:.*]] = alloca i64 -+ ! CHECK-DAG: %[[I_PRIV:.*]] = alloca i64 + integer*8 :: i + + ! CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr %[[I_PRIV]]) @@ -9273,8 +9176,8 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/loop-lif + +! CHECK-LABEL: define void @simdloop_i32 +subroutine simdloop_i32() -+ ! CHECK: %[[I:.*]] = alloca i32 + ! CHECK: %[[I_PRIV:.*]] = alloca i32 ++ ! CHECK: %[[I:.*]] = alloca i32 + integer :: i + + ! CHECK: call void @llvm.lifetime.start.p0(i64 4, ptr %[[I_PRIV]]) @@ -9293,8 +9196,8 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/loop-lif + +! CHECK-LABEL: define void @simdloop_i64 +subroutine simdloop_i64() -+ ! CHECK: %[[I:.*]] = alloca i64 + ! CHECK: %[[I_PRIV:.*]] = alloca i64 ++ ! CHECK: %[[I:.*]] = alloca i64 + integer*8 :: i + + ! CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr %[[I_PRIV]]) @@ -9310,73 +9213,45 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/loop-lif + end do + !$omp end simd +end subroutine -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/omp-do-simd-safelen.f90 llvm-project/flang/test/Lower/OpenMP/omp-do-simd-safelen.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/omp-do-simd-safelen.f90 1969-12-31 18:00:00.000000000 -0600 -+++ llvm-project/flang/test/Lower/OpenMP/omp-do-simd-safelen.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -0,0 +1,16 @@ -+! This test checks lowering of OpenMP do simd safelen() pragma -+ -+! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s -+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s -+subroutine testDoSimdSafelen(int_array) -+ integer :: int_array(*) -+ -+ !CHECK: omp.wsloop { -+ !CHECK: omp.simd safelen(4) { -+ !CHECK: omp.loop_nest {{.*}} { -+ !$omp do simd safelen(4) -+ do index_ = 1, 10 -+ end do -+ !$omp end do simd -+ -+end subroutine testDoSimdSafelen -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/omp-do-simd-simdlen.f90 llvm-project/flang/test/Lower/OpenMP/omp-do-simd-simdlen.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/omp-do-simd-simdlen.f90 1969-12-31 18:00:00.000000000 -0600 -+++ llvm-project/flang/test/Lower/OpenMP/omp-do-simd-simdlen.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -0,0 +1,16 @@ -+! This test checks lowering of OpenMP do simd simdlen() pragma -+ -+! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s -+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s -+subroutine testDoSimdSimdlen(int_array) -+ integer :: int_array(*) -+ -+ !CHECK: omp.wsloop { -+ !CHECK: omp.simd simdlen(4) { -+ !CHECK: omp.loop_nest {{.*}} { -+ !$omp do simd simdlen(4) -+ do index_ = 1, 10 -+ end do -+ !$omp end do simd -+ -+end subroutine testDoSimdSimdlen -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/parallel-reduction3.f90 llvm-project/flang/test/Lower/OpenMP/parallel-reduction3.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/parallel-reduction3.f90 2024-06-12 10:43:12.676209906 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/parallel-reduction3.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -69,13 +69,13 @@ - ! CHECK: omp.parallel { - ! CHECK: %[[VAL_14:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} - ! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) --! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 --! CHECK: %[[VAL_17:.*]] = arith.constant 100 : i32 --! CHECK: %[[VAL_18:.*]] = arith.constant 1 : i32 --! CHECK: %[[VAL_19:.*]] = fir.alloca !fir.box> --! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_19]] : !fir.ref>> --! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_19]] -> %[[VAL_20:.*]] : !fir.ref>>) { --! CHECK-NEXT: omp.loop_nest (%[[VAL_21:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) { -+! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box> -+! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_16]] : !fir.ref>> -+! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32 -+! CHECK: %[[VAL_18:.*]] = arith.constant 100 : i32 -+! CHECK: %[[VAL_19:.*]] = arith.constant 1 : i32 -+! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_16]] -> %[[VAL_20:.*]] : !fir.ref>>) { -+! CHECK-NEXT: omp.loop_nest (%[[VAL_21:.*]]) : i32 = (%[[VAL_17]]) to (%[[VAL_18]]) inclusive step (%[[VAL_19]]) { - ! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFsEc"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) - ! CHECK: fir.store %[[VAL_21]] to %[[VAL_15]]#1 : !fir.ref - ! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref>> -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/reduction_var_map.f90 llvm-project/flang/test/Lower/OpenMP/reduction_var_map.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/reduction_var_map.f90 1969-12-31 18:00:00.000000000 -0600 -+++ llvm-project/flang/test/Lower/OpenMP/reduction_var_map.f90 2024-06-12 10:44:09.351614239 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/reduction-target-spmd.f90 llvm-project-aso/flang/test/Lower/OpenMP/reduction-target-spmd.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/reduction-target-spmd.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Lower/OpenMP/reduction-target-spmd.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,15 @@ ++! RUN: %flang_fc1 -emit-fir -fopenmp -o - %s | FileCheck %s ++! RUN: bbc -emit-fir -fopenmp -o - %s | FileCheck %s ++ ++! CHECK: omp.teams ++! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}} -> %{{.*}} : !fir.ref) ++subroutine myfun() ++ integer :: i, j ++ i = 0 ++ j = 0 ++ !$omp target teams distribute parallel do reduction(+:i) ++ do j = 1,5 ++ i = i + j ++ end do ++ !$omp end target teams distribute parallel do ++end subroutine myfun +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/reduction-teams.f90 llvm-project-aso/flang/test/Lower/OpenMP/reduction-teams.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/reduction-teams.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Lower/OpenMP/reduction-teams.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,13 @@ ++! RUN: bbc -emit-fir -fopenmp -o - %s | FileCheck %s ++! RUN: %flang_fc1 -emit-fir -fopenmp -o - %s | FileCheck %s ++ ++! CHECK: omp.teams ++! CHECK-SAME: reduction ++subroutine reduction_teams() ++ integer :: i ++ i = 0 ++ ++ !$omp teams reduction(+:i) ++ i = i + 1 ++ !$omp end teams ++end subroutine reduction_teams +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/reduction_var_map.f90 llvm-project-aso/flang/test/Lower/OpenMP/reduction_var_map.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/reduction_var_map.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Lower/OpenMP/reduction_var_map.f90 2024-11-23 20:39:47.192175322 -0600 @@ -0,0 +1,43 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s + @@ -9421,24 +9296,41 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/reductio +end subroutine omp_target_team_separate +!CHECK-LABEL: func.func @_QPomp_target_team_separate() { +!CHECK: omp.map.info var_ptr({{.*}} : !fir.ref, i64) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = "s3"} -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/simd.f90 llvm-project/flang/test/Lower/OpenMP/simd.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/simd.f90 2024-06-12 10:43:12.676209906 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/simd.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -27,10 +27,10 @@ - ! CHECK: %[[ARG_N:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %{{[0-9]+}} {uniq_name = "_QFsimd_with_if_clauseEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) - integer :: i, n, threshold - !$OMP SIMD IF( n .GE. threshold ) -+ ! CHECK: %[[COND:.*]] = arith.cmpi sge - ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 - ! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0 - ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 -- ! CHECK: %[[COND:.*]] = arith.cmpi sge - ! CHECK: omp.simd if(%[[COND:.*]]) { - ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { - do i = 1, n -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/target.f90 llvm-project/flang/test/Lower/OpenMP/target.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/target.f90 2024-06-12 10:43:12.676209906 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/target.f90 2024-06-12 10:44:09.351614239 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/rtl-flags.f90 llvm-project-aso/flang/test/Lower/OpenMP/rtl-flags.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/rtl-flags.f90 2024-08-27 20:36:25.296172440 -0500 ++++ llvm-project-aso/flang/test/Lower/OpenMP/rtl-flags.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -20,7 +20,7 @@ + !RUN: bbc -emit-hlfir -fopenmp -fopenmp-assume-no-nested-parallelism -fopenmp-is-target-device -o - %s | FileCheck %s --check-prefix=NEST-PAR-DEVICE-FIR + !RUN: bbc -emit-hlfir -fopenmp -fopenmp-target-debug=1 -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism -fopenmp-assume-threads-oversubscription -fopenmp-assume-no-thread-state -fopenmp-is-target-device -o - %s | FileCheck %s --check-prefix=ALL-DEVICE-FIR + +-!DEFAULT-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags ++!DEFAULT-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags + !DEFAULT-DEVICE-FIR-SAME: omp.is_target_device = true + !DEFAULT-DEVICE-FIR-VERSION: module attributes {{{.*}}omp.flags = #omp.flags + !DEFAULT-DEVICE-FIR-VERSION-SAME: omp.is_target_device = true +@@ -28,12 +28,12 @@ + !DEFAULT-HOST-FIR: module attributes {{{.*}}omp.is_target_device = false{{.*}} + !DEFAULT-HOST-FIR-VERSION: module attributes {{{.*}}omp.is_target_device = false + !DEFAULT-HOST-FIR-VERSION-SAME: omp.version = #omp.version +-!DBG-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +-!DBG-EQ-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +-!TEAMS-OSUB-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +-!THREAD-OSUB-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +-!THREAD-STATE-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +-!NEST-PAR-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +-!ALL-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags ++!DBG-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags ++!DBG-EQ-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags ++!TEAMS-OSUB-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags ++!THREAD-OSUB-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags ++!THREAD-STATE-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags ++!NEST-PAR-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags ++!ALL-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags + subroutine omp_subroutine() + end subroutine omp_subroutine +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/target.f90 llvm-project-aso/flang/test/Lower/OpenMP/target.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/target.f90 2024-11-23 20:25:26.851275134 -0600 ++++ llvm-project-aso/flang/test/Lower/OpenMP/target.f90 2024-11-23 20:39:47.192175322 -0600 @@ -45,16 +45,16 @@ integer :: b(1024) integer :: c(1024) @@ -9497,31 +9389,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/target.f end subroutine omp_target_exit_mt !=============================================================================== -@@ -320,13 +320,13 @@ - !CHECK: omp.terminator - !$omp end target data - !CHECK: } -- !CHECK: %[[BOUNDS_B:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) -- !CHECK: %[[MAP_B:.*]] = omp.map.info var_ptr(%[[VAR_B_DECL]]#0 : !fir.ref>, !fir.array<1024xi32>) map_clauses(always, from) capture(ByRef) bounds(%[[BOUNDS_B]]) -> !fir.ref> {name = "b"} -- !CHECK: omp.target_data map_entries(%[[MAP_B]] : !fir.ref>) { -- !$omp target data map(always, from : b) -- !CHECK: omp.terminator -- !$omp end target data -- !CHECK: } -+ !CHECK %[[BOUNDS_B:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) -+ !CHECK %[[MAP_B:.*]] = omp.map.info var_ptr(%[[VAR_B_DECL]]#0 : !fir.ref>, !fir.array<1024xi32>) map_clauses(always, from) capture(ByRef) bounds(%[[BOUNDS_B]]) -> !fir.ref> {name = "b"} -+ !CHECK omp.target_data map_entries(%[[MAP_B]] : !fir.ref>) { -+ !!$omp target data map(always, from : b) -+ !CHECK omp.terminator -+ !!$omp end target data -+ !CHECK } - end subroutine omp_target_data_mt - - !=============================================================================== -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/target_private.f90 llvm-project/flang/test/Lower/OpenMP/target_private.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/target_private.f90 1969-12-31 18:00:00.000000000 -0600 -+++ llvm-project/flang/test/Lower/OpenMP/target_private.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -0,0 +1,76 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/target_private.f90 llvm-project-aso/flang/test/Lower/OpenMP/target_private.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/target_private.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Lower/OpenMP/target_private.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,73 @@ +!Test data-sharing attribute clauses for the `target` directive. + +!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s @@ -9531,319 +9402,376 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/target_p + implicit none + integer :: x(1) + -+!$omp target private(x) -+ x(1) = 42 -+!$omp end target -+!CHECK: omp.target { -+!CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -+!CHECK-DAG: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.array<1xi32> {bindc_name = "x", -+!CHECK-SAME: pinned, uniq_name = "_QFomp_target_privateEx"} -+!CHECK-NEXT: %[[SHAPE:.*]] = fir.shape %[[C1]] : (index) -> !fir.shape<1> -+!CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]](%[[SHAPE]]) -+!CHECK-SAME: {uniq_name = "_QFomp_target_privateEx"} : -+!CHECK-SAME: (!fir.ref>, !fir.shape<1>) -> -+!CHECK-SAME: (!fir.ref>, !fir.ref>) -+!CHECK-DAG: %[[C42:.*]] = arith.constant 42 : i32 -+!CHECK-DAG: %[[C1_2:.*]] = arith.constant 1 : index -+!CHECK-NEXT: %[[PRIV_BINDING:.*]] = hlfir.designate %[[PRIV_DECL]]#0 (%[[C1_2]]) -+!CHECK-SAME: : (!fir.ref>, index) -> !fir.ref -+!CHECK-NEXT: hlfir.assign %[[C42]] to %[[PRIV_BINDING]] : i32, !fir.ref -+!CHECK-NEXT: omp.terminator -+!CHECK-NEXT: } ++!$omp target private(x) ++ x(1) = 42 ++!$omp end target ++!CHECK: omp.target { ++!CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index ++!CHECK-DAG: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.array<1xi32> {bindc_name = "x", ++!CHECK-SAME: pinned, uniq_name = "_QFomp_target_privateEx"} ++!CHECK-NEXT: %[[SHAPE:.*]] = fir.shape %[[C1]] : (index) -> !fir.shape<1> ++!CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]](%[[SHAPE]]) ++!CHECK-SAME: {uniq_name = "_QFomp_target_privateEx"} : ++!CHECK-SAME: (!fir.ref>, !fir.shape<1>) -> ++!CHECK-SAME: (!fir.ref>, !fir.ref>) ++!CHECK-DAG: %[[C42:.*]] = arith.constant 42 : i32 ++!CHECK-DAG: %[[C1_2:.*]] = arith.constant 1 : index ++!CHECK-NEXT: %[[PRIV_BINDING:.*]] = hlfir.designate %[[PRIV_DECL]]#0 (%[[C1_2]]) ++!CHECK-SAME: : (!fir.ref>, index) -> !fir.ref ++!CHECK-NEXT: hlfir.assign %[[C42]] to %[[PRIV_BINDING]] : i32, !fir.ref ++!CHECK-NEXT: omp.terminator ++!CHECK-NEXT: } ++ ++end subroutine omp_target_private ++ ++!CHECK-LABEL: func.func @_QPomp_target_target_do_simd() ++subroutine omp_target_target_do_simd() ++ implicit none ++ ++ real(8) :: var ++ integer(8) :: iv ++ ++!$omp target teams distribute parallel do simd private(iv,var) ++ do iv=0,10 ++ var = 3.14 ++ end do ++!$omp end target teams distribute parallel do simd ++ ++!CHECK: %[[IV:.*]] = omp.map.info{{.*}}map_clauses(implicit{{.*}}{name = "iv"} ++!CHECK: %[[VAR:.*]] = omp.map.info{{.*}}map_clauses(implicit{{.*}}{name = "var"} ++!CHECK: omp.target ++!CHECK-SAME: map_entries(%[[IV]] -> %[[MAP_IV:.*]], %[[VAR]] -> %[[MAP_VAR:.*]] : !fir.ref, !fir.ref) ++!CHECK: %[[MAP_IV_DECL:.*]]:2 = hlfir.declare %[[MAP_IV]] ++!CHECK: %[[MAP_VAR_DECL:.*]]:2 = hlfir.declare %[[MAP_VAR]] ++!CHECK: omp.teams { ++!CHECK: omp.parallel private(@{{.*}} %[[MAP_IV_DECL]]#0 -> %[[IV_PRIV:.*]], @{{.*}} %[[MAP_VAR_DECL]]#0 -> %[[VAR_PRIV:.*]] : !fir.ref, !fir.ref) { ++!CHECK: %[[IV_DECL:.*]]:2 = hlfir.declare %[[IV_PRIV]] ++!CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_PRIV]] ++!CHECK: omp.distribute { ++!CHECK-NEXT: omp.wsloop { ++!CHECK-NEXT: omp.simd { ++!CHECK-NEXT: omp.loop_nest ++!CHECK: fir.store {{.*}} to %[[IV_DECL]]#1 ++!CHECK: hlfir.assign {{.*}} to %[[VAR_DECL]]#0 ++!CHECK: omp.yield ++!CHECK-NEXT: } ++!CHECK-NEXT: } {omp.composite} ++!CHECK-NEXT: } {omp.composite} ++!CHECK-NEXT: } {omp.composite} ++!CHECK-NEXT: omp.terminator ++!CHECK-NEXT: } ++!CHECK-NEXT: omp.terminator ++!CHECK-NEXT: } ++!CHECK-NEXT: omp.terminator ++!CHECK-NEXT: } ++ ++end subroutine omp_target_target_do_simd +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/target-spmd.f90 llvm-project-aso/flang/test/Lower/OpenMP/target-spmd.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/target-spmd.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Lower/OpenMP/target-spmd.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,191 @@ ++! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s ++ ++! CHECK-LABEL: func.func @_QPdistribute_parallel_do_generic() { ++subroutine distribute_parallel_do_generic() ++ ! CHECK: omp.target ++ ! CHECK-NOT: host_eval({{.*}}) ++ ! CHECK-SAME: { ++ !$omp target ++ !$omp teams ++ !$omp distribute parallel do ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end distribute parallel do ++ call bar() ++ !$omp end teams ++ !$omp end target ++ ++ ! CHECK: omp.target ++ ! CHECK-NOT: host_eval({{.*}}) ++ ! CHECK-SAME: { ++ !$omp target teams ++ !$omp distribute parallel do ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end distribute parallel do ++ call bar() ++ !$omp end target teams ++ ++ ! CHECK: omp.target ++ ! CHECK-NOT: host_eval({{.*}}) ++ ! CHECK-SAME: { ++ !$omp target teams ++ !$omp distribute parallel do ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end distribute parallel do ++ ++ !$omp distribute parallel do ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end distribute parallel do ++ !$omp end target teams ++end subroutine distribute_parallel_do_generic ++ ++! CHECK-LABEL: func.func @_QPdistribute_parallel_do_spmd() { ++subroutine distribute_parallel_do_spmd() ++ ! CHECK: omp.target ++ ! CHECK-SAME: host_eval({{.*}}) ++ !$omp target ++ !$omp teams ++ !$omp distribute parallel do ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end distribute parallel do ++ !$omp end teams ++ !$omp end target ++ ++ ! CHECK: omp.target ++ ! CHECK-SAME: host_eval({{.*}}) ++ !$omp target teams ++ !$omp distribute parallel do ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end distribute parallel do ++ !$omp end target teams ++end subroutine distribute_parallel_do_spmd ++ ++! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_generic() { ++subroutine distribute_parallel_do_simd_generic() ++ ! CHECK: omp.target ++ ! CHECK-NOT: host_eval({{.*}}) ++ ! CHECK-SAME: { ++ !$omp target ++ !$omp teams ++ !$omp distribute parallel do simd ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end distribute parallel do simd ++ call bar() ++ !$omp end teams ++ !$omp end target ++ ++ ! CHECK: omp.target ++ ! CHECK-NOT: host_eval({{.*}}) ++ ! CHECK-SAME: { ++ !$omp target teams ++ !$omp distribute parallel do simd ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end distribute parallel do simd ++ call bar() ++ !$omp end target teams ++ ++ ! CHECK: omp.target ++ ! CHECK-NOT: host_eval({{.*}}) ++ ! CHECK-SAME: { ++ !$omp target teams ++ !$omp distribute parallel do simd ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end distribute parallel do simd ++ ++ !$omp distribute parallel do simd ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end distribute parallel do simd ++ !$omp end target teams ++end subroutine distribute_parallel_do_simd_generic + -+end subroutine omp_target_private ++! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_spmd() { ++subroutine distribute_parallel_do_simd_spmd() ++ ! CHECK: omp.target ++ ! CHECK-SAME: host_eval({{.*}}) ++ !$omp target ++ !$omp teams ++ !$omp distribute parallel do simd ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end distribute parallel do simd ++ !$omp end teams ++ !$omp end target + -+!CHECK-LABEL: func.func @_QPomp_target_target_do_simd() -+subroutine omp_target_target_do_simd() -+ implicit none ++ ! CHECK: omp.target ++ ! CHECK-SAME: host_eval({{.*}}) ++ !$omp target teams ++ !$omp distribute parallel do simd ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end distribute parallel do simd ++ !$omp end target teams ++end subroutine distribute_parallel_do_simd_spmd + -+ real(8) :: var -+ integer(8) :: iv ++! CHECK-LABEL: func.func @_QPteams_distribute_parallel_do_spmd() { ++subroutine teams_distribute_parallel_do_spmd() ++ ! CHECK: omp.target ++ ! CHECK-SAME: host_eval({{.*}}) ++ !$omp target ++ !$omp teams distribute parallel do ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end teams distribute parallel do ++ !$omp end target ++end subroutine teams_distribute_parallel_do_spmd + -+!$omp target teams distribute parallel do simd private(iv,var) -+ do iv=0,10 -+ var = 3.14 -+ end do -+!$omp end target teams distribute parallel do simd ++! CHECK-LABEL: func.func @_QPteams_distribute_parallel_do_simd_spmd() { ++subroutine teams_distribute_parallel_do_simd_spmd() ++ ! CHECK: omp.target ++ ! CHECK-SAME: host_eval({{.*}}) ++ !$omp target ++ !$omp teams distribute parallel do simd ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end teams distribute parallel do simd ++ !$omp end target ++end subroutine teams_distribute_parallel_do_simd_spmd + -+!CHECK: %[[IV:.*]] = omp.map.info{{.*}}map_clauses(implicit{{.*}}{name = "iv"} -+!CHECK: %[[VAR:.*]] = omp.map.info{{.*}}map_clauses(implicit{{.*}}{name = "var"} -+!CHECK: omp.target -+!CHECK-SAME: map_entries(%[[IV]] -> %{{.*}}, %[[VAR]] -> %{{.*}} -+!CHECK: omp.teams { -+!CHECK: %[[IV_PRIV:.*]] = fir.alloca i64 {bindc_name = "iv" -+!CHECK: %[[IV_DECL:.*]]:2 = hlfir.declare %[[IV_PRIV]] -+!CHECK: %[[VAR_PRIV:.*]] = fir.alloca f64 {bindc_name = "var" -+!CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_PRIV]] -+!CHECK: omp.distribute { -+!CHECK-NEXT: omp.parallel { -+!CHECK-NEXT: omp.wsloop { -+!CHECK-NEXT: omp.simd { -+!CHECK-NEXT: omp.loop_nest -+!CHECK: fir.store {{.*}} to %[[IV_DECL]]#1 -+!CHECK: hlfir.assign {{.*}} to %[[VAR_DECL]]#0 -+!CHECK: omp.yield -+!CHECK-NEXT: } -+!CHECK-NEXT: omp.terminator -+!CHECK-NEXT: } -+!CHECK-NEXT: omp.terminator -+!CHECK-NEXT: } -+!CHECK-NEXT: omp.terminator -+!CHECK-NEXT: } -+!CHECK-NEXT: omp.terminator -+!CHECK-NEXT: } -+!CHECK-NEXT: omp.terminator -+!CHECK-NEXT: } -+!CHECK-NEXT: omp.terminator -+!CHECK-NEXT: } ++! CHECK-LABEL: func.func @_QPtarget_teams_distribute_parallel_do_spmd() { ++subroutine target_teams_distribute_parallel_do_spmd() ++ ! CHECK: omp.target ++ ! CHECK-SAME: host_eval({{.*}}) ++ !$omp target teams distribute parallel do ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end target teams distribute parallel do ++end subroutine target_teams_distribute_parallel_do_spmd + -+end subroutine omp_target_target_do_simd -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90 llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90 2024-06-12 10:43:12.676209906 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -1,11 +1,11 @@ - ! This test checks lowering of OpenMP do simd aligned() pragma - --! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s --! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -+! RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s -+! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s - subroutine testDoSimdAligned(int_array) - use iso_c_binding - type(c_ptr) :: int_array --!CHECK: not yet implemented: Unhandled clause ALIGNED in DO SIMD construct -+!CHECK: not yet implemented: Unhandled clause ALIGNED in SIMD construct - !$omp do simd aligned(int_array) - do index_ = 1, 10 - call c_test_call(int_array) -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 2024-06-12 10:43:12.676209906 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -4,7 +4,7 @@ - ! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s - subroutine testDoSimdLinear(int_array) - integer :: int_array(*) --!CHECK: not yet implemented: Unhandled clause LINEAR in DO SIMD construct -+!CHECK: not yet implemented: Unhandled clause LINEAR in DO construct - !$omp do simd linear(int_array) - do index_ = 1, 10 - end do -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90 llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90 2024-06-12 10:43:12.676209906 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90 1969-12-31 18:00:00.000000000 -0600 -@@ -1,14 +0,0 @@ --! This test checks lowering of OpenMP do simd safelen() pragma -- --! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s --! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s --subroutine testDoSimdSafelen(int_array) -- integer :: int_array(*) --!CHECK: not yet implemented: Unhandled clause SAFELEN in DO SIMD construct --!$omp do simd safelen(4) -- do index_ = 1, 10 -- end do --!$omp end do simd -- --end subroutine testDoSimdSafelen -- -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90 llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90 2024-06-12 10:43:12.676209906 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90 1969-12-31 18:00:00.000000000 -0600 -@@ -1,14 +0,0 @@ --! This test checks lowering of OpenMP do simd simdlen() pragma -- ++! CHECK-LABEL: func.func @_QPtarget_teams_distribute_parallel_do_simd_spmd() { ++subroutine target_teams_distribute_parallel_do_simd_spmd() ++ ! CHECK: omp.target ++ ! CHECK-SAME: host_eval({{.*}}) ++ !$omp target teams distribute parallel do simd ++ do i = 1, 10 ++ call foo(i) ++ end do ++ !$omp end target teams distribute parallel do simd ++end subroutine target_teams_distribute_parallel_do_simd_spmd +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/Todo/reduction-teams.f90 llvm-project-aso/flang/test/Lower/OpenMP/Todo/reduction-teams.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/Todo/reduction-teams.f90 2024-08-27 20:36:25.292172480 -0500 ++++ llvm-project-aso/flang/test/Lower/OpenMP/Todo/reduction-teams.f90 1969-12-31 18:00:00.000000000 -0600 +@@ -1,12 +0,0 @@ -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s --subroutine testDoSimdSimdlen(int_array) -- integer :: int_array(*) --!CHECK: not yet implemented: Unhandled clause SIMDLEN in DO SIMD construct --!$omp do simd simdlen(4) -- do index_ = 1, 10 -- end do --!$omp end do simd - --end subroutine testDoSimdSimdlen +-! CHECK: not yet implemented: Unhandled clause REDUCTION in TEAMS construct +-subroutine reduction_teams() +- integer :: i +- i = 0 - -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/Todo/reduction-teams.f90 llvm-project/flang/test/Lower/OpenMP/Todo/reduction-teams.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/Todo/reduction-teams.f90 2023-08-31 11:50:50.526255637 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/Todo/reduction-teams.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -1,7 +1,9 @@ --! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s --! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -+! RUN: bbc -emit-fir -fopenmp -o - %s | FileCheck %s -+! RUN: %flang_fc1 -emit-fir -fopenmp -o - %s | FileCheck %s -+! XFAIL: * +- !$omp teams reduction(+:i) +- i = i + 1 +- !$omp end teams +-end subroutine reduction_teams +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 llvm-project-aso/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 +--- llvm-project-aso-orig/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 2024-11-23 20:25:26.851275134 -0600 ++++ llvm-project-aso/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -14,9 +14,9 @@ + real, pointer :: pa(:) + type(c_ptr) :: cptr + +- !$omp target data use_device_ptr(pa, cptr, array) +- !$omp end target data +- end subroutine ++ !$omp target data use_device_ptr(pa, cptr, array) ++ !$omp end target data ++end subroutine --! CHECK: not yet implemented: Unhandled clause REDUCTION in TEAMS construct -+! CHECK: omp.teams -+! CHECK-SAME: reduction - subroutine reduction_teams() - integer :: i - i = 0 -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/wsloop-chunks.f90 llvm-project/flang/test/Lower/OpenMP/wsloop-chunks.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/wsloop-chunks.f90 2024-06-12 10:43:12.680209864 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/wsloop-chunks.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -16,12 +16,12 @@ - do i=1, 9 - print*, i - --! CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32 --! CHECK: %[[VAL_3:.*]] = arith.constant 9 : i32 --! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 --! CHECK: %[[VAL_5:.*]] = arith.constant 4 : i32 --! CHECK: omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait { --! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { -+! CHECK: %[[VAL_2:.*]] = arith.constant 4 : i32 -+! CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 -+! CHECK: %[[VAL_4:.*]] = arith.constant 9 : i32 -+! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 -+! CHECK: omp.wsloop schedule(static = %[[VAL_2]] : i32) nowait { -+! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { - ! CHECK: fir.store %[[ARG0]] to %[[STORE_IV:.*]]#1 : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]]#0 : !fir.ref - ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 -@@ -37,12 +37,12 @@ - do i=1, 9 - print*, i*2 - --! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 --! CHECK: %[[VAL_15:.*]] = arith.constant 9 : i32 --! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 --! CHECK: %[[VAL_17:.*]] = arith.constant 4 : i32 --! CHECK: omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait { --! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) { -+! CHECK: %[[VAL_14:.*]] = arith.constant 4 : i32 -+! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 -+! CHECK: %[[VAL_16:.*]] = arith.constant 9 : i32 -+! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32 -+! CHECK: omp.wsloop schedule(static = %[[VAL_14]] : i32) nowait { -+! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[VAL_15]]) to (%[[VAL_16]]) inclusive step (%[[VAL_17]]) { - ! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref - ! CHECK: %[[VAL_24:.*]] = arith.constant 2 : i32 - ! CHECK: %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]]#0 : !fir.ref -@@ -64,12 +64,12 @@ - !$OMP END DO NOWAIT - ! CHECK: %[[VAL_28:.*]] = arith.constant 6 : i32 - ! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_0]]#0 : i32, !fir.ref --! CHECK: %[[VAL_29:.*]] = arith.constant 1 : i32 --! CHECK: %[[VAL_30:.*]] = arith.constant 9 : i32 --! CHECK: %[[VAL_31:.*]] = arith.constant 1 : i32 --! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_0]]#0 : !fir.ref --! CHECK: omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait { --! CHECK-NEXT: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) { -+! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_0]]#0 : !fir.ref -+! CHECK: %[[VAL_30:.*]] = arith.constant 1 : i32 -+! CHECK: %[[VAL_31:.*]] = arith.constant 9 : i32 -+! CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 -+! CHECK: omp.wsloop schedule(static = %[[VAL_29]] : i32) nowait { -+! CHECK-NEXT: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[VAL_30]]) to (%[[VAL_31]]) inclusive step (%[[VAL_32]]) { - ! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]]#1 : !fir.ref - ! CHECK: %[[VAL_39:.*]] = arith.constant 3 : i32 - ! CHECK: %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]]#0 : !fir.ref -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 2024-06-12 10:43:12.680209864 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 2024-06-12 10:44:09.351614239 -0500 -@@ -73,14 +73,14 @@ - ! CHECK: omp.parallel { - ! CHECK: %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} - ! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) --! CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32 --! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 --! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 --! CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> --! CHECK: %[[VAL_12:.*]] = fir.alloca !fir.box> --! CHECK: fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref>> --! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_12]] -> %[[VAL_13:.*]] : !fir.ref>>) { --! CHECK-NEXT: omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { -+! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> -+! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.box> -+! CHECK: fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref>> -+! CHECK: %[[VAL_10:.*]] = arith.constant 0 : i32 -+! CHECK: %[[VAL_11:.*]] = arith.constant 10 : i32 -+! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 -+! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_9]] -> %[[VAL_13:.*]] : !fir.ref>>) { -+! CHECK-NEXT: omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) { - ! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) - ! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref - ! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref>> -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 2024-06-12 10:43:12.680209864 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 2024-06-12 10:44:09.355614196 -0500 -@@ -79,13 +79,13 @@ - ! CHECK: omp.parallel { - ! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} - ! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFFreduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) --! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32 --! CHECK: %[[VAL_7:.*]] = arith.constant 10 : i32 --! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 --! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.box> --! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_9]] : !fir.ref>> --! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxf64 %[[VAL_9]] -> %[[VAL_10:.*]] : !fir.ref>>) { --! CHECK-NEXT: omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { -+! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box> -+! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_6]] : !fir.ref>> -+! CHECK: %[[VAL_7:.*]] = arith.constant 0 : i32 -+! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 -+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -+! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxf64 %[[VAL_6]] -> %[[VAL_10:.*]] : !fir.ref>>) { -+! CHECK-NEXT: omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { - ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) - ! CHECK: fir.store %[[VAL_11]] to %[[VAL_5]]#1 : !fir.ref - ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 2024-06-12 10:43:12.680209864 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 2024-06-12 10:44:09.355614196 -0500 -@@ -73,14 +73,14 @@ - ! CHECK: omp.parallel { - ! CHECK: %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} - ! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) --! CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32 --! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 --! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 --! CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> --! CHECK: %[[VAL_12:.*]] = fir.alloca !fir.box> --! CHECK: fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref>> --! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_12]] -> %[[VAL_13:.*]] : !fir.ref>>) { --! CHECK-NEXT: omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { -+! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> -+! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.box> -+! CHECK: fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref>> -+! CHECK: %[[VAL_10:.*]] = arith.constant 0 : i32 -+! CHECK: %[[VAL_11:.*]] = arith.constant 10 : i32 -+! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 -+! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_9]] -> %[[VAL_13:.*]] : !fir.ref>>) { -+! CHECK-NEXT: omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) { - ! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) - ! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref - ! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90 llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90 ---- llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90 2024-06-12 10:43:12.680209864 -0500 -+++ llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90 2024-06-12 10:44:09.355614196 -0500 -@@ -109,14 +109,14 @@ - ! CHECK: omp.parallel { - ! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} - ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) --! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 --! CHECK: %[[VAL_14:.*]] = arith.constant 10 : i32 -+! CHECK: %[[VAL_13:.*]] = fir.embox %[[VAL_4]]#0(%[[VAL_3]]) : (!fir.ref>, !fir.shape<2>) -> !fir.box> -+! CHECK: %[[VAL_14:.*]] = fir.alloca !fir.box> -+! CHECK: fir.store %[[VAL_13]] to %[[VAL_14]] : !fir.ref>> - ! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 --! CHECK: %[[VAL_16:.*]] = fir.embox %[[VAL_4]]#0(%[[VAL_3]]) : (!fir.ref>, !fir.shape<2>) -> !fir.box> --! CHECK: %[[VAL_17:.*]] = fir.alloca !fir.box> --! CHECK: fir.store %[[VAL_16]] to %[[VAL_17]] : !fir.ref>> --! CHECK: omp.wsloop reduction(@add_reduction_f64 %[[VAL_8]]#0 -> %[[VAL_18:.*]] : !fir.ref, byref @add_reduction_byref_box_3x3xf64 %[[VAL_17]] -> %[[VAL_19:.*]] : !fir.ref>>) { --! CHECK: omp.loop_nest (%[[VAL_20:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { -+! CHECK: %[[VAL_16:.*]] = arith.constant 10 : i32 -+! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32 -+! CHECK: omp.wsloop reduction(@add_reduction_f64 %[[VAL_8]]#0 -> %[[VAL_18:.*]] : !fir.ref, byref @add_reduction_byref_box_3x3xf64 %[[VAL_14]] -> %[[VAL_19:.*]] : !fir.ref>>) { -+! CHECK: omp.loop_nest (%[[VAL_20:.*]]) : i32 = (%[[VAL_15]]) to (%[[VAL_16]]) inclusive step (%[[VAL_17]]) { - ! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFEscalar"} : (!fir.ref) -> (!fir.ref, !fir.ref) - ! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFEarray"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) - ! CHECK: fir.store %[[VAL_20]] to %[[VAL_12]]#1 : !fir.ref -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurrent/basic_device.f90 llvm-project/flang/test/Transforms/DoConcurrent/basic_device.f90 ---- llvm-project.orig/flang/test/Transforms/DoConcurrent/basic_device.f90 1969-12-31 18:00:00.000000000 -0600 -+++ llvm-project/flang/test/Transforms/DoConcurrent/basic_device.f90 2024-06-12 10:44:09.355614196 -0500 + !CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr() + !CHECK: omp.target_data use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref>>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.llvm_ptr>>) use_device_ptr({{.*}} : !fir.ref>) { +@@ -26,9 +26,9 @@ + real, pointer :: pa(:) + type(c_ptr) :: cptr + +- !$omp target data use_device_ptr(pa, cptr) use_device_addr(array) +- !$omp end target data +- end subroutine ++ !$omp target data use_device_ptr(pa, cptr) use_device_addr(array) ++ !$omp end target data ++end subroutine + + !CHECK: func.func @{{.*}}only_use_device_addr() + !CHECK: omp.target_data use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref>>>, !fir.ref>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.llvm_ptr>>) { +@@ -38,9 +38,9 @@ + real, pointer :: pa(:) + type(c_ptr) :: cptr + +- !$omp target data use_device_addr(pa, cptr, array) +- !$omp end target data +- end subroutine ++ !$omp target data use_device_addr(pa, cptr, array) ++ !$omp end target data ++end subroutine + + !CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr_and_map() + !CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref, !fir.ref) use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref>>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.llvm_ptr>>) use_device_ptr(%{{.*}} : !fir.ref>) { +@@ -51,9 +51,9 @@ + real, pointer :: pa(:) + type(c_ptr) :: cptr + +- !$omp target data use_device_ptr(pa, cptr) use_device_addr(array) map(tofrom: i, j) +- !$omp end target data +- end subroutine ++ !$omp target data use_device_ptr(pa, cptr) use_device_addr(array) map(tofrom: i, j) ++ !$omp end target data ++end subroutine + + !CHECK: func.func @{{.*}}only_use_map() + !CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.ref>>>, !fir.ref>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.llvm_ptr>>) { +@@ -63,6 +63,6 @@ + real, pointer :: pa(:) + type(c_ptr) :: cptr + +- !$omp target data map(pa, cptr, array) +- !$omp end target data +- end subroutine ++ !$omp target data map(pa, cptr, array) ++ !$omp end target data ++end subroutine +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Semantics/OpenMP/combined-constructs.f90 llvm-project-aso/flang/test/Semantics/OpenMP/combined-constructs.f90 +--- llvm-project-aso-orig/flang/test/Semantics/OpenMP/combined-constructs.f90 2024-11-23 20:25:26.855275120 -0600 ++++ llvm-project-aso/flang/test/Semantics/OpenMP/combined-constructs.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -1,4 +1,4 @@ +-! RUN: %python %S/../test_errors.py %s %flang -fopenmp ++! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=11 + + program main + implicit none +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Semantics/OpenMP/ordered01.f90 llvm-project-aso/flang/test/Semantics/OpenMP/ordered01.f90 +--- llvm-project-aso-orig/flang/test/Semantics/OpenMP/ordered01.f90 2024-11-14 15:28:41.138642459 -0600 ++++ llvm-project-aso/flang/test/Semantics/OpenMP/ordered01.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -1,4 +1,4 @@ +-! RUN: %python %S/../test_errors.py %s %flang -fopenmp ++! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + ! OpenMP Version 5.1 + ! Check OpenMP construct validity for the following directives: + ! 2.19.9 Ordered Construct +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Semantics/OpenMP/ordered03.f90 llvm-project-aso/flang/test/Semantics/OpenMP/ordered03.f90 +--- llvm-project-aso-orig/flang/test/Semantics/OpenMP/ordered03.f90 2024-11-14 15:28:41.138642459 -0600 ++++ llvm-project-aso/flang/test/Semantics/OpenMP/ordered03.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -1,4 +1,4 @@ +-! RUN: %python %S/../test_errors.py %s %flang -fopenmp ++! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + ! OpenMP Version 5.1 + ! Check OpenMP construct validity for the following directives: + ! 2.19.9 Ordered Construct +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/basic_device.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/basic_device.f90 +--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/basic_device.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/basic_device.f90 2024-11-23 20:39:47.192175322 -0600 @@ -0,0 +1,86 @@ +! Tests mapping of a basic `do concurrent` loop to +! `!$omp target teams distribute parallel do`. @@ -9868,7 +9796,19 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre + + ! CHECK-NOT: fir.do_loop + -+ ! CHECK-DAG: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#0 ++ ! CHECK: %[[DUPLICATED_C1:.*]] = arith.constant 1 : i32 ++ ! CHECK: %[[DUPLICATED_LB:.*]] = fir.convert %[[DUPLICATED_C1]] : (i32) -> index ++ ! CHECK: %[[DUPLICATED_C10:.*]] = arith.constant 10 : i32 ++ ! CHECK: %[[DUPLICATED_UB:.*]] = fir.convert %[[DUPLICATED_C10]] : (i32) -> index ++ ! CHECK: %[[DUPLICATED_STEP:.*]] = arith.constant 1 : index ++ ++ ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 ++ ! CHECK: %[[HOST_LB:.*]] = fir.convert %[[C1]] : (i32) -> index ++ ! CHECK: %[[C10:.*]] = arith.constant 10 : i32 ++ ! CHECK: %[[HOST_UB:.*]] = fir.convert %[[C10]] : (i32) -> index ++ ! CHECK: %[[HOST_STEP:.*]] = arith.constant 1 : index ++ ++ ! CHECK-DAG: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#1 + ! CHECK: %[[C0:.*]] = arith.constant 0 : index + ! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %[[C0]] : index + @@ -9876,32 +9816,22 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre + ! CHECK-SAME: upper_bound(%[[UPPER_BOUND]] : index) + ! CHECK-SAME: extent(%[[A_EXTENT]] : index) + -+ ! CHECK-DAG: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#0 : {{[^(]+}}) ++ ! CHECK-DAG: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#1 : {{[^(]+}}) + ! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]]) + -+ ! CHECK: %[[TRIP_COUNT:.*]] = arith.muli %{{.*}}, %{{.*}} : i64 -+ -+ ! CHECK: omp.target trip_count(%[[TRIP_COUNT]] : i64) ++ ! CHECK: omp.target ++ ! CHECK-SAME: host_eval(%[[HOST_LB]] -> %[[LB:[[:alnum:]]+]], %[[HOST_UB]] -> %[[UB:[[:alnum:]]+]], %[[HOST_STEP]] -> %[[STEP:[[:alnum:]]+]] : index, index, index) + ! CHECK-SAME: map_entries(%[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]], + ! CHECK-SAME: %[[A_MAP_INFO]] -> %[[A_ARG:.[[:alnum:]]+]] + -+ ! CHECK-NEXT: ^{{.*}}(%[[I_ARG]]: !fir.ref, %[[A_ARG]]: !fir.ref>): -+ + ! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]] + ! CHECK: omp.teams { ++ ! CHECK-NEXT: omp.parallel { + + ! CHECK-NEXT: %[[ITER_VAR:.*]] = fir.alloca i32 {bindc_name = "i"} + ! CHECK-NEXT: %[[BINDING:.*]]:2 = hlfir.declare %[[ITER_VAR]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + -+ ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 -+ ! CHECK: %[[LB:.*]] = fir.convert %[[C1]] : (i32) -> index -+ ! CHECK: %[[C10:.*]] = arith.constant 10 : i32 -+ ! CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index -+ ! CHECK: %[[STEP:.*]] = arith.constant 1 : index -+ + ! CHECK-NEXT: omp.distribute { -+ ! CHECK-NEXT: omp.parallel { -+ + ! CHECK-NEXT: omp.wsloop { + + ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { @@ -9915,12 +9845,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre + ! CHECK-NEXT: omp.yield + ! CHECK-NEXT: } + ++ ! CHECK-NEXT: } {omp.composite} ++ ! CHECK-NEXT: } {omp.composite} + ! CHECK-NEXT: omp.terminator -+ ! CHECK-NEXT: } -+ ! CHECK-NEXT: omp.terminator -+ ! CHECK-NEXT: } -+ ! CHECK-NEXT: omp.terminator -+ ! CHECK-NEXT: } ++ ! CHECK-NEXT: } {omp.composite} + ! CHECK-NEXT: omp.terminator + ! CHECK-NEXT: } + ! CHECK-NEXT: omp.terminator @@ -9931,10 +9859,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre + + ! CHECK-NOT: fir.do_loop +end program do_concurrent_basic -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurrent/basic_host.f90 llvm-project/flang/test/Transforms/DoConcurrent/basic_host.f90 ---- llvm-project.orig/flang/test/Transforms/DoConcurrent/basic_host.f90 1969-12-31 18:00:00.000000000 -0600 -+++ llvm-project/flang/test/Transforms/DoConcurrent/basic_host.f90 2024-06-12 10:44:09.355614196 -0500 -@@ -0,0 +1,50 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/basic_host.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/basic_host.f90 +--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/basic_host.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/basic_host.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,49 @@ +! Tests mapping of a basic `do concurrent` loop to `!$omp parallel do`. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %s -o - \ @@ -9974,7 +9902,6 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre + ! CHECK-NEXT: hlfir.assign %[[IV_VAL1]] to %[[ARR_ACCESS]] : i32, !fir.ref + ! CHECK-NEXT: omp.yield + ! CHECK-NEXT: } -+ ! CHECK-NEXT: omp.terminator + ! CHECK-NEXT: } + + ! CHECK-NEXT: omp.terminator @@ -9985,10 +9912,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre + + ! CHECK-NOT: fir.do_loop +end program do_concurrent_basic -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurrent/basic_host.mlir llvm-project/flang/test/Transforms/DoConcurrent/basic_host.mlir ---- llvm-project.orig/flang/test/Transforms/DoConcurrent/basic_host.mlir 1969-12-31 18:00:00.000000000 -0600 -+++ llvm-project/flang/test/Transforms/DoConcurrent/basic_host.mlir 2024-06-12 10:44:09.355614196 -0500 -@@ -0,0 +1,63 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/basic_host.mlir llvm-project-aso/flang/test/Transforms/DoConcurrent/basic_host.mlir +--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/basic_host.mlir 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/basic_host.mlir 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,62 @@ +// Tests mapping of a basic `do concurrent` loop to `!$omp parallel do`. + +// RUN: fir-opt --fopenmp-do-concurrent-conversion="map-to=host" %s | FileCheck %s @@ -10033,7 +9960,6 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre + // CHECK-NEXT: hlfir.assign %[[IV_VAL1]] to %[[ARR_ACCESS]] : i32, !fir.ref + // CHECK-NEXT: omp.yield + // CHECK-NEXT: } -+ // CHECK-NEXT: omp.terminator + // CHECK-NEXT: } + + // CHECK-NEXT: omp.terminator @@ -10052,10 +9978,533 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre + + return + } -diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/tools/bbc/bbc.cpp llvm-project/flang/tools/bbc/bbc.cpp ---- llvm-project.orig/flang/tools/bbc/bbc.cpp 2024-06-12 10:43:12.700209654 -0500 -+++ llvm-project/flang/tools/bbc/bbc.cpp 2024-06-12 10:44:09.355614196 -0500 -@@ -139,6 +139,12 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 +--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,76 @@ ++! Tests that locally destroyed values in a `do concurrent` loop are properly ++! handled. Locally destroyed values are those values for which the Fortran runtime ++! calls `@_FortranADestroy` inside the loops body. If these values are allocated ++! outside the loop, and the loop is mapped to OpenMP, then a runtime error would ++! occur due to multiple teams trying to access the same allocation. ++ ++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %s -o - \ ++! RUN: | FileCheck %s --check-prefixes=COMMON ++ ++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %s -o - \ ++! RUN: | FileCheck %s --check-prefixes=COMMON,DEVICE ++ ++module struct_mod ++ type test_struct ++ integer, allocatable :: x_ ++ end type ++ ++ interface test_struct ++ pure module function construct_from_components(x) result(struct) ++ implicit none ++ integer, intent(in) :: x ++ type(test_struct) struct ++ end function ++ end interface ++end module ++ ++submodule(struct_mod) struct_sub ++ implicit none ++ ++contains ++ module procedure construct_from_components ++ struct%x_ = x ++ end procedure ++end submodule struct_sub ++ ++program main ++ use struct_mod, only : test_struct ++ ++ implicit none ++ type(test_struct), dimension(10) :: a ++ integer :: i ++ integer :: total ++ ++ do concurrent (i=1:10) ++ a(i) = test_struct(i) ++ end do ++ ++ do i=1,10 ++ total = total + a(i)%x_ ++ end do ++ ++ print *, "total =", total ++end program main ++ ++! DEVICE: omp.target {{.*}} { ++! DEVICE: omp.teams { ++! COMMON: omp.parallel { ++! COMMON: %[[LOCAL_TEMP:.*]] = fir.alloca !fir.type<_QMstruct_modTtest_struct{x_:!fir.box>}> {bindc_name = ".result"} ++! DEVICE: omp.distribute { ++! COMMON: omp.wsloop { ++! COMMON: omp.loop_nest {{.*}} { ++! COMMON: %[[TEMP_VAL:.*]] = fir.call @_QMstruct_modPconstruct_from_components ++! COMMON: fir.save_result %[[TEMP_VAL]] to %[[LOCAL_TEMP]] ++! COMMON: %[[EMBOXED_LOCAL:.*]] = fir.embox %[[LOCAL_TEMP]] ++! COMMON: %[[CONVERTED_LOCAL:.*]] = fir.convert %[[EMBOXED_LOCAL]] ++! COMMON: fir.call @_FortranADestroy(%[[CONVERTED_LOCAL]]) ++! COMMON: omp.yield ++! COMMON: } ++! COMMON: } ++! DEVICE: } ++! COMMON: omp.terminator ++! COMMON: } ++! DEVICE: omp.terminator ++! DEVICE: } ++! DEVICE: omp.terminator ++! DEVICE: } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 +--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,87 @@ ++! Tests loop-nest detection algorithm for do-concurrent mapping. ++ ++! REQUIRES: asserts ++ ++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host \ ++! RUN: -mmlir -debug %s -o - 2> %t.log || true ++ ++! RUN: FileCheck %s < %t.log ++ ++program main ++ implicit none ++ ++contains ++ ++subroutine foo(n) ++ implicit none ++ integer :: n, m ++ integer :: i, j, k ++ integer :: x ++ integer, dimension(n) :: a ++ integer, dimension(n, n, n) :: b ++ ++ ! CHECK: Loop pair starting at location ++ ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is perfectly nested ++ do concurrent(i=1:n, j=1:bar(n*m, n/m)) ++ a(i) = n ++ end do ++ ++ ! CHECK: Loop pair starting at location ++ ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is perfectly nested ++ do concurrent(i=bar(n, x):n, j=1:bar(n*m, n/m)) ++ a(i) = n ++ end do ++ ++ ! CHECK: Loop pair starting at location ++ ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested ++ do concurrent(i=bar(n, x):n) ++ do concurrent(j=1:bar(n*m, n/m)) ++ a(i) = n ++ end do ++ end do ++ ++ ! CHECK: Loop pair starting at location ++ ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested ++ do concurrent(i=1:n) ++ x = 10 ++ do concurrent(j=1:m) ++ b(i,j,k) = i * j + k ++ end do ++ end do ++ ++ ! CHECK: Loop pair starting at location ++ ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested ++ do concurrent(i=1:n) ++ do concurrent(j=1:m) ++ b(i,j,k) = i * j + k ++ end do ++ x = 10 ++ end do ++ ++ ! CHECK: Loop pair starting at location ++ ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested ++ do concurrent(i=1:n) ++ do concurrent(j=1:m) ++ b(i,j,k) = i * j + k ++ x = 10 ++ end do ++ end do ++ ++ ! CHECK: Loop pair starting at location ++ ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is perfectly nested ++ do concurrent(i=bar(n, x):n, j=1:bar(n*m, n/m), k=1:bar(n*m, bar(n*m, n/m))) ++ a(i) = n ++ end do ++ ++ ++end subroutine ++ ++pure function bar(n, m) ++ implicit none ++ integer, intent(in) :: n, m ++ integer :: bar ++ ++ bar = n + m ++end function ++ ++end program main +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 +--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,118 @@ ++! Tests mapping of a `do concurrent` loop with multiple iteration ranges. ++ ++! RUN: split-file %s %t ++ ++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %t/multi_range.f90 -o - \ ++! RUN: | FileCheck %s --check-prefixes=HOST,COMMON ++ ++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %t/multi_range.f90 -o - \ ++! RUN: | FileCheck %s --check-prefixes=DEVICE,COMMON ++ ++!--- multi_range.f90 ++program main ++ integer, parameter :: n = 20 ++ integer, parameter :: m = 40 ++ integer, parameter :: l = 60 ++ integer :: a(n, m, l) ++ ++ do concurrent(i=3:n, j=5:m, k=7:l) ++ a(i,j,k) = i * j + k ++ end do ++end ++ ++! COMMON: func.func @_QQmain ++ ++! DEVICE: %[[DUPLICATED_C3:.*]] = arith.constant 3 : i32 ++! DEVICE: %[[DUPLICATED_LB_I:.*]] = fir.convert %[[DUPLICATED_C3]] : (i32) -> index ++! DEVICE: %[[DUPLICATED_C20:.*]] = arith.constant 20 : i32 ++! DEVICE: %[[DUPLICATED_UB_I:.*]] = fir.convert %[[DUPLICATED_C20]] : (i32) -> index ++! DEVICE: %[[DUPLICATED_STEP_I:.*]] = arith.constant 1 : index ++ ++! DEVICE: %[[C3:.*]] = arith.constant 3 : i32 ++! DEVICE: %[[HOST_LB_I:.*]] = fir.convert %[[C3]] : (i32) -> index ++! DEVICE: %[[C20:.*]] = arith.constant 20 : i32 ++! DEVICE: %[[HOST_UB_I:.*]] = fir.convert %[[C20]] : (i32) -> index ++! DEVICE: %[[HOST_STEP_I:.*]] = arith.constant 1 : index ++ ++! DEVICE: %[[C5:.*]] = arith.constant 5 : i32 ++! DEVICE: %[[HOST_LB_J:.*]] = fir.convert %[[C5]] : (i32) -> index ++! DEVICE: %[[C40:.*]] = arith.constant 40 : i32 ++! DEVICE: %[[HOST_UB_J:.*]] = fir.convert %[[C40]] : (i32) -> index ++! DEVICE: %[[HOST_STEP_J:.*]] = arith.constant 1 : index ++ ++! DEVICE: %[[C7:.*]] = arith.constant 7 : i32 ++! DEVICE: %[[HOST_LB_K:.*]] = fir.convert %[[C7]] : (i32) -> index ++! DEVICE: %[[C60:.*]] = arith.constant 60 : i32 ++! DEVICE: %[[HOST_UB_K:.*]] = fir.convert %[[C60]] : (i32) -> index ++! DEVICE: %[[HOST_STEP_K:.*]] = arith.constant 1 : index ++ ++! DEVICE: omp.target host_eval( ++! DEVICE-SAME: %[[HOST_LB_I]] -> %[[LB_I:[[:alnum:]]+]], ++! DEVICE-SAME: %[[HOST_UB_I]] -> %[[UB_I:[[:alnum:]]+]], ++! DEVICE-SAME: %[[HOST_STEP_I]] -> %[[STEP_I:[[:alnum:]]+]], ++! DEVICE-SAME: %[[HOST_LB_J]] -> %[[LB_J:[[:alnum:]]+]], ++! DEVICE-SAME: %[[HOST_UB_J]] -> %[[UB_J:[[:alnum:]]+]], ++! DEVICE-SAME: %[[HOST_STEP_J]] -> %[[STEP_J:[[:alnum:]]+]], ++! DEVICE-SAME: %[[HOST_LB_K]] -> %[[LB_K:[[:alnum:]]+]], ++! DEVICE-SAME: %[[HOST_UB_K]] -> %[[UB_K:[[:alnum:]]+]], ++! DEVICE-SAME: %[[HOST_STEP_K]] -> %[[STEP_K:[[:alnum:]]+]] : ++! DEVICE-SAME: index, index, index, index, index, index, index, index, index) ++ ++! DEVICE: omp.teams ++ ++! HOST-NOT: omp.target ++! HOST-NOT: omp.teams ++ ++! COMMON: omp.parallel { ++ ++! COMMON-NEXT: %[[ITER_VAR_I:.*]] = fir.alloca i32 {bindc_name = "i"} ++! COMMON-NEXT: %[[BINDING_I:.*]]:2 = hlfir.declare %[[ITER_VAR_I]] {uniq_name = "_QFEi"} ++ ++! COMMON-NEXT: %[[ITER_VAR_J:.*]] = fir.alloca i32 {bindc_name = "j"} ++! COMMON-NEXT: %[[BINDING_J:.*]]:2 = hlfir.declare %[[ITER_VAR_J]] {uniq_name = "_QFEj"} ++ ++! COMMON-NEXT: %[[ITER_VAR_K:.*]] = fir.alloca i32 {bindc_name = "k"} ++! COMMON-NEXT: %[[BINDING_K:.*]]:2 = hlfir.declare %[[ITER_VAR_K]] {uniq_name = "_QFEk"} ++ ++! HOST: %[[C3:.*]] = arith.constant 3 : i32 ++! HOST: %[[LB_I:.*]] = fir.convert %[[C3]] : (i32) -> index ++! HOST: %[[C20:.*]] = arith.constant 20 : i32 ++! HOST: %[[UB_I:.*]] = fir.convert %[[C20]] : (i32) -> index ++! HOST: %[[STEP_I:.*]] = arith.constant 1 : index ++ ++! HOST: %[[C5:.*]] = arith.constant 5 : i32 ++! HOST: %[[LB_J:.*]] = fir.convert %[[C5]] : (i32) -> index ++! HOST: %[[C40:.*]] = arith.constant 40 : i32 ++! HOST: %[[UB_J:.*]] = fir.convert %[[C40]] : (i32) -> index ++! HOST: %[[STEP_J:.*]] = arith.constant 1 : index ++ ++! HOST: %[[C7:.*]] = arith.constant 7 : i32 ++! HOST: %[[LB_K:.*]] = fir.convert %[[C7]] : (i32) -> index ++! HOST: %[[C60:.*]] = arith.constant 60 : i32 ++! HOST: %[[UB_K:.*]] = fir.convert %[[C60]] : (i32) -> index ++! HOST: %[[STEP_K:.*]] = arith.constant 1 : index ++ ++! DEVICE: omp.distribute ++ ++! COMMON: omp.wsloop { ++! COMMON-NEXT: omp.loop_nest ++! COMMON-SAME: (%[[ARG0:[^[:space:]]+]], %[[ARG1:[^[:space:]]+]], %[[ARG2:[^[:space:]]+]]) ++! COMMON-SAME: : index = (%[[LB_I]], %[[LB_J]], %[[LB_K]]) ++! COMMON-SAME: to (%[[UB_I]], %[[UB_J]], %[[UB_K]]) inclusive ++! COMMON-SAME: step (%[[STEP_I]], %[[STEP_J]], %[[STEP_K]]) { ++ ++! COMMON-NEXT: %[[IV_IDX_I:.*]] = fir.convert %[[ARG0]] ++! COMMON-NEXT: fir.store %[[IV_IDX_I]] to %[[BINDING_I]]#1 ++ ++! COMMON-NEXT: %[[IV_IDX_J:.*]] = fir.convert %[[ARG1]] ++! COMMON-NEXT: fir.store %[[IV_IDX_J]] to %[[BINDING_J]]#1 ++ ++! COMMON-NEXT: %[[IV_IDX_K:.*]] = fir.convert %[[ARG2]] ++! COMMON-NEXT: fir.store %[[IV_IDX_K]] to %[[BINDING_K]]#1 ++ ++! COMMON: omp.yield ++! COMMON-NEXT: } ++! COMMON-NEXT: } ++ ++! HOST-NEXT: omp.terminator ++! HOST-NEXT: } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 +--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,44 @@ ++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %s -o - \ ++! RUN: | FileCheck %s ++ ++program main ++ implicit none ++ ++ call foo(10) ++ ++ contains ++ subroutine foo(n) ++ implicit none ++ integer :: n ++ integer :: i ++ integer, dimension(n) :: a ++ ++ do concurrent(i=1:n) ++ a(i) = i ++ end do ++ end subroutine ++ ++end program main ++ ++! CHECK: %[[N_DECL:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %{{.*}} {uniq_name = "_QFFfooEn"} ++! CHECK: fir.load ++! CHECK: %[[N_VAL:.*]] = fir.load %[[N_DECL]]#0 : !fir.ref ++ ++! CHECK: omp.parallel { ++ ++! Verify the constant chain of ops for the lower bound are cloned in the region. ++! CHECK: %[[C1:.*]] = arith.constant 1 : i32 ++! CHECK: %[[LB:.*]] = fir.convert %[[C1]] : (i32) -> index ++ ++! Verify that we restort to using the outside value for the upper bound since it ++! is not originally a constant. ++! CHECK: %[[UB:.*]] = fir.convert %[[N_VAL]] : (i32) -> index ++ ++! CHECK: omp.wsloop { ++! CHECK: omp.loop_nest (%{{.*}}) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%{{.*}}) { ++! CHECK: omp.yield ++! CHECK: } ++! CHECK: } ++! CHECK: omp.terminator ++! CHECK: } ++ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 +--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,65 @@ ++! Tests that if `do concurrent` is not perfectly nested in its parent loop, that ++! we skip converting the not-perfectly nested `do concurrent` loop. ++ ++ ++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %s -o - \ ++! RUN: | FileCheck %s --check-prefixes=HOST,COMMON ++ ++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %s -o - \ ++! RUN: | FileCheck %s --check-prefixes=DEVICE,COMMON ++ ++program main ++ integer, parameter :: n = 10 ++ integer, parameter :: m = 20 ++ integer, parameter :: l = 30 ++ integer x; ++ integer :: a(n, m, l) ++ ++ do concurrent(i=1:n) ++ x = 10 ++ do concurrent(j=1:m, k=1:l) ++ a(i,j,k) = i * j + k ++ end do ++ end do ++end ++ ++! HOST: %[[ORIG_K_ALLOC:.*]] = fir.alloca i32 {bindc_name = "k"} ++! HOST: %[[ORIG_K_DECL:.*]]:2 = hlfir.declare %[[ORIG_K_ALLOC]] ++ ++! HOST: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j"} ++! HOST: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]] ++ ++! DEVICE: omp.target {{.*}}map_entries(%{{[^[:space:]]+}} -> %[[I_ARG:[^,]+]], ++! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[X_ARG:[^,]+]], ++! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_ARG:[^,]+]], ++! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[K_ARG:[^,]+]], ++! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[A_ARG:[^:]+]]: ++! DEVICE-SAME: !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref>) { ++ ++! DEVICE: %[[TARGET_J_DECL:.*]]:2 = hlfir.declare %[[J_ARG]] {uniq_name = "_QFEj"} ++! DEVICE: %[[TARGET_K_DECL:.*]]:2 = hlfir.declare %[[K_ARG]] {uniq_name = "_QFEk"} ++ ++! DEVICE: omp.teams ++ ++! COMMON: omp.parallel { ++ ++! DEVICE: omp.distribute ++ ++! COMMON: omp.wsloop { ++! COMMON: omp.loop_nest ({{[^[:space:]]+}}) {{.*}} { ++! COMMON: fir.do_loop %[[J_IV:.*]] = {{.*}} { ++! COMMON: %[[J_IV_CONV:.*]] = fir.convert %[[J_IV]] : (index) -> i32 ++! HOST: fir.store %[[J_IV_CONV]] to %[[ORIG_J_DECL]]#1 ++! DEVICE: fir.store %[[J_IV_CONV]] to %[[TARGET_J_DECL]]#1 ++ ++! COMMON: fir.do_loop %[[K_IV:.*]] = {{.*}} { ++! COMMON: %[[K_IV_CONV:.*]] = fir.convert %[[K_IV]] : (index) -> i32 ++! HOST: fir.store %[[K_IV_CONV]] to %[[ORIG_K_DECL]]#1 ++! DEVICE: fir.store %[[K_IV_CONV]] to %[[TARGET_K_DECL]]#1 ++! COMMON: } ++! COMMON: } ++! COMMON: omp.yield ++! COMMON: } ++! COMMON: } ++! COMMON: omp.terminator ++! COMMON: } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 +--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,42 @@ ++! Tests `do concurrent` mapping when mapped value(s) depend on values defined ++! outside the target region; e.g. the size of the array is dynamic. This needs ++! to be handled by localizing these region outsiders by either cloning them in ++! the region or in case we cannot do that, map them and use the mapped values. ++ ++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %s -o - \ ++! RUN: | FileCheck %s ++ ++subroutine foo(n) ++ implicit none ++ integer :: n ++ integer :: i ++ integer, dimension(n) :: a ++ ++ do concurrent(i=1:10) ++ a(i) = i ++ end do ++end subroutine ++ ++! CHECK-DAG: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFfooEi"} ++! CHECK-DAG: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFfooEa"} ++! CHECK-DAG: %[[N_ALLOC:.*]] = fir.alloca i32 ++ ++! CHECK-DAG: %[[I_MAP:.*]] = omp.map.info var_ptr(%[[I_DECL]]#1 : {{.*}}) ++! CHECK-DAG: %[[A_MAP:.*]] = omp.map.info var_ptr(%[[A_DECL]]#1 : {{.*}}) ++! CHECK-DAG: %[[N_MAP:.*]] = omp.map.info var_ptr(%[[N_ALLOC]] : {{.*}}) ++ ++! CHECK: omp.target ++! CHECK-SAME: map_entries(%[[I_MAP]] -> %[[I_ARG:arg[0-9]*]], ++! CHECK-SAME: %[[A_MAP]] -> %[[A_ARG:arg[0-9]*]], ++! CHECK-SAME: %[[N_MAP]] -> %[[N_ARG:arg[0-9]*]] : {{.*}}) ++! CHECK-SAME: {{.*}} { ++ ++! CHECK-DAG: %{{.*}} = hlfir.declare %[[I_ARG]] ++! CHECK-DAG: %{{.*}} = hlfir.declare %[[A_ARG]] ++! CHECK-DAG: %{{.*}} = fir.load %[[N_ARG]] ++ ++! CHECK: omp.terminator ++! CHECK: } ++ ++ ++ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 +--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 2024-11-23 20:39:47.192175322 -0600 +@@ -0,0 +1,63 @@ ++! Tests that if `do concurrent` is indirectly nested in its parent loop, that we ++! skip converting the indirectly nested `do concurrent` loop. ++ ++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %s -o - \ ++! RUN: | FileCheck %s --check-prefixes=HOST,COMMON ++ ++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %s -o - \ ++! RUN: | FileCheck %s --check-prefixes=DEVICE,COMMON ++ ++program main ++ integer, parameter :: n = 10 ++ integer, parameter :: m = 20 ++ integer, parameter :: l = 30 ++ integer x; ++ integer :: a(n, m, l) ++ ++ do concurrent(i=1:n) ++ do j=1,m ++ do concurrent(k=1:l) ++ a(i,j,k) = i * j + k ++ end do ++ end do ++ end do ++end ++ ++! HOST: %[[ORIG_K_ALLOC:.*]] = fir.alloca i32 {bindc_name = "k"} ++! HOST: %[[ORIG_K_DECL:.*]]:2 = hlfir.declare %[[ORIG_K_ALLOC]] ++ ++! HOST: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j", {{.*}}} ++! HOST: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]] ++ ++! DEVICE: omp.target {{.*}}map_entries(%{{[^[:space:]]+}} -> %[[I_ARG:[^,]+]], ++! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_ARG:[^,]+]], ++! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[K_ARG:[^,]+]], ++! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[A_ARG:[^:]+]]: ++! DEVICE-SAME: !fir.ref, !fir.ref, !fir.ref, !fir.ref>) { ++ ++! DEVICE: %[[TARGET_J_DECL:.*]]:2 = hlfir.declare %[[J_ARG]] {uniq_name = "_QFEj"} ++! DEVICE: %[[TARGET_K_DECL:.*]]:2 = hlfir.declare %[[K_ARG]] {uniq_name = "_QFEk"} ++ ++! DEVICE: omp.teams ++ ++! COMMON: omp.parallel { ++ ++! DEVICE: omp.distribute ++ ++! COMMON: omp.wsloop { ++! COMMON: omp.loop_nest ({{[^[:space:]]+}}) {{.*}} { ++! COMMON: fir.do_loop {{.*}} iter_args(%[[J_IV:.*]] = {{.*}}) -> {{.*}} { ++! HOST: fir.store %[[J_IV]] to %[[ORIG_J_DECL]]#1 ++! DEVICE: fir.store %[[J_IV]] to %[[TARGET_J_DECL]]#1 ++ ++! COMMON: fir.do_loop %[[K_IV:.*]] = {{.*}} { ++! COMMON: %[[K_IV_CONV:.*]] = fir.convert %[[K_IV]] : (index) -> i32 ++! HOST: fir.store %[[K_IV_CONV]] to %[[ORIG_K_DECL]]#1 ++! DEVICE: fir.store %[[K_IV_CONV]] to %[[TARGET_K_DECL]]#1 ++! COMMON: } ++! COMMON: } ++! COMMON: omp.yield ++! COMMON: } ++! COMMON: } ++! COMMON: omp.terminator ++! COMMON: } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/tools/bbc/bbc.cpp llvm-project-aso/flang/tools/bbc/bbc.cpp +--- llvm-project-aso-orig/flang/tools/bbc/bbc.cpp 2024-11-23 20:25:26.855275120 -0600 ++++ llvm-project-aso/flang/tools/bbc/bbc.cpp 2024-11-23 20:39:47.192175322 -0600 +@@ -142,6 +142,12 @@ llvm::cl::desc("enable openmp device compilation"), llvm::cl::init(false)); @@ -10068,44 +10517,52 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/tools/bbc/bbc.cpp llvm-pro static llvm::cl::opt enableOpenMPGPU("fopenmp-is-gpu", llvm::cl::desc("enable openmp GPU target codegen"), -@@ -272,7 +278,18 @@ - static mlir::LogicalResult runOpenMPPasses(mlir::ModuleOp mlirModule) { +@@ -163,7 +169,7 @@ + static llvm::cl::opt + setOpenMPVersion("fopenmp-version", + llvm::cl::desc("OpenMP standard version"), +- llvm::cl::init(11)); ++ llvm::cl::init(52)); + + static llvm::cl::opt setOpenMPTargetDebug( + "fopenmp-target-debug", +@@ -287,7 +293,19 @@ + static llvm::LogicalResult runOpenMPPasses(mlir::ModuleOp mlirModule) { mlir::PassManager pm(mlirModule->getName(), mlir::OpPassManager::Nesting::Implicit); - fir::createOpenMPFIRPassPipeline(pm, enableOpenMPDevice); + using DoConcurrentMappingKind = + Fortran::frontend::CodeGenOptions::DoConcurrentMappingKind; + -+ auto doConcurrentMappingKind = ++ fir::OpenMPFIRPassPipelineOpts opts; ++ opts.isTargetDevice = enableOpenMPDevice; ++ opts.doConcurrentMappingKind = + llvm::StringSwitch( + enableDoConcurrentToOpenMPConversion) + .Case("host", DoConcurrentMappingKind::DCMK_Host) + .Case("device", DoConcurrentMappingKind::DCMK_Device) + .Default(DoConcurrentMappingKind::DCMK_None); + -+ fir::createOpenMPFIRPassPipeline(pm, enableOpenMPDevice, -+ doConcurrentMappingKind); ++ fir::createOpenMPFIRPassPipeline(pm, opts); (void)mlir::applyPassManagerCLOptions(pm); if (mlir::failed(pm.run(mlirModule))) { llvm::errs() << "FATAL: failed to correctly apply OpenMP pass pipeline"; -diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h ---- llvm-project.orig/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h 2024-06-12 10:43:13.184204565 -0500 -+++ llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h 2024-06-12 10:44:09.355614196 -0500 -@@ -16,6 +16,7 @@ - - #include "llvm/Analysis/MemorySSAUpdater.h" - #include "llvm/Frontend/OpenMP/OMPConstants.h" -+#include "llvm/Frontend/OpenMP/OMPGridValues.h" - #include "llvm/IR/DebugLoc.h" - #include "llvm/IR/IRBuilder.h" - #include "llvm/Support/Allocator.h" -@@ -99,14 +100,18 @@ - /// expanded. - std::optional IsGPU; - -- // Flag for specifying if offloading is mandatory. -+ /// Flag for specifying if offloading is mandatory. - std::optional OpenMPOffloadMandatory; +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/.github/workflows/release-binaries.yml llvm-project-aso/.github/workflows/release-binaries.yml +--- llvm-project-aso-orig/.github/workflows/release-binaries.yml 2024-10-18 17:40:31.952997950 -0500 ++++ llvm-project-aso/.github/workflows/release-binaries.yml 2024-11-23 20:39:47.192175322 -0600 +@@ -328,7 +328,7 @@ + run: | + # Build some of the mlir tools that take a long time to link + if [ "${{ needs.prepare.outputs.build-flang }}" = "true" ]; then +- ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/ -j2 flang bbc ++ ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/ -j2 flang-new bbc + fi + ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/ \ + mlir-bytecode-parser-fuzzer \ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm-project-aso/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +--- llvm-project-aso-orig/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h 2024-10-29 11:07:19.577634859 -0500 ++++ llvm-project-aso/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h 2024-11-23 20:39:47.196175308 -0600 +@@ -110,7 +110,8 @@ /// First separator used between the initial two parts of a name. std::optional FirstSeparator; @@ -10114,83 +10571,88 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenM + /// Separator used between all of the rest consecutive parts of a name std::optional Separator; -+ // Grid Value for the GPU target -+ std::optional GridValue; -+ - OpenMPIRBuilderConfig(); - OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU, - bool OpenMPOffloadMandatory, -@@ -132,6 +137,11 @@ - return *OpenMPOffloadMandatory; - } + // Grid Value for the GPU target +@@ -727,13 +728,12 @@ + LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, + const Twine &Name = "loop"); -+ omp::GV getGridValue() const { -+ assert(GridValue.has_value() && "GridValue is not set"); -+ return *GridValue; -+ } +- /// Generator for the control flow structure of an OpenMP canonical loop. ++ /// Calculate the trip count of a canonical loop. + /// +- /// Instead of a logical iteration space, this allows specifying user-defined +- /// loop counter values using increment, upper- and lower bounds. To +- /// disambiguate the terminology when counting downwards, instead of lower +- /// bounds we use \p Start for the loop counter value in the first body +- /// iteration. ++ /// This allows specifying user-defined loop counter values using increment, ++ /// upper- and lower bounds. To disambiguate the terminology when counting ++ /// downwards, instead of lower bounds we use \p Start for the loop counter ++ /// value in the first body iteration. + /// + /// Consider the following limitations: + /// +@@ -757,7 +757,32 @@ + /// + /// for (int i = 0; i < 42; i -= 1u) + /// +- // ++ /// \param Loc The insert and source location description. ++ /// \param Start Value of the loop counter for the first iterations. ++ /// \param Stop Loop counter values past this will stop the loop. ++ /// \param Step Loop counter increment after each iteration; negative ++ /// means counting down. ++ /// \param IsSigned Whether Start, Stop and Step are signed integers. ++ /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop ++ /// counter. ++ /// \param Name Base name used to derive instruction names. ++ /// ++ /// \returns The value holding the calculated trip count. ++ Value *calculateCanonicalLoopTripCount(const LocationDescription &Loc, ++ Value *Start, Value *Stop, Value *Step, ++ bool IsSigned, bool InclusiveStop, ++ const Twine &Name = "loop"); + - bool hasRequiresFlags() const { return RequiresFlags; } - bool hasRequiresReverseOffload() const; - bool hasRequiresUnifiedAddress() const; -@@ -167,6 +177,7 @@ - void setOpenMPOffloadMandatory(bool Value) { OpenMPOffloadMandatory = Value; } - void setFirstSeparator(StringRef FS) { FirstSeparator = FS; } - void setSeparator(StringRef S) { Separator = S; } -+ void setGridValue(omp::GV G) { GridValue = G; } - - void setHasRequiresReverseOffload(bool Value); - void setHasRequiresUnifiedAddress(bool Value); -@@ -1235,12 +1246,14 @@ - getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, - StringRef ParentName = ""); - -+ /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used -+ /// to -+ /// store lambdas with capture. - /// Functions used to generate reductions. Such functions take two Values - /// representing LHS and RHS of the reduction, respectively, and a reference - /// to the value that is updated to refer to the reduction result. - using ReductionGenTy = - function_ref; -- - /// Functions used to generate atomic reductions. Such functions take two - /// Values representing pointers to LHS and RHS of the reduction, as well as - /// the element type of these pointers. They are expected to atomically -@@ -1278,6 +1291,22 @@ - AtomicReductionGenTy AtomicReductionGen; - }; - -+ /// \param Loc The location where the reduction was -+ /// encountered. Must be within the associate -+ /// directive and after the last local access to the -+ /// reduction variables. -+ /// \param AllocaIP An insertion point suitable for allocas usable -+ /// in reductions. -+ /// \param ReductionInfos A list of info on each reduction variable. -+ /// \param IsNoWait A flag set if the reduction is marked as nowait. -+ InsertPointTy createReductionsGPU(const LocationDescription &Loc, -+ InsertPointTy AllocaIP, -+ ArrayRef ReductionInfos, -+ ArrayRef IsByRef, -+ bool IsNoWait = false, -+ bool IsTeamsReduction = false, -+ bool HasDistribute = false); -+ - // TODO: provide atomic and non-atomic reduction generators for reduction - // operators defined by the OpenMP specification. - -@@ -1344,7 +1373,9 @@ - InsertPointTy createReductions(const LocationDescription &Loc, - InsertPointTy AllocaIP, - ArrayRef ReductionInfos, -- ArrayRef IsByRef, bool IsNoWait = false); -+ ArrayRef IsByRef, bool IsNoWait = false, -+ bool IsTeamsReduction = false, -+ bool HasDistribute = false); ++ /// Generator for the control flow structure of an OpenMP canonical loop. ++ /// ++ /// Instead of a logical iteration space, this allows specifying user-defined ++ /// loop counter values using increment, upper- and lower bounds. To ++ /// disambiguate the terminology when counting downwards, instead of lower ++ /// bounds we use \p Start for the loop counter value in the first body ++ /// ++ /// It calls \see calculateCanonicalLoopTripCount for trip count calculations, ++ /// so limitations of that method apply here as well. ++ /// + /// \param Loc The insert and source location description. + /// \param BodyGenCB Callback that will generate the loop body code. + /// \param Start Value of the loop counter for the first iterations. +@@ -1878,8 +1903,6 @@ + /// nowait. + /// \param IsTeamsReduction Optional flag set if it is a teams + /// reduction. +- /// \param HasDistribute Optional flag set if it is a +- /// distribute reduction. + /// \param GridValue Optional GPU grid value. + /// \param ReductionBufNum Optional OpenMPCUDAReductionBufNumValue to be + /// used for teams reduction. +@@ -1888,7 +1911,6 @@ + const LocationDescription &Loc, InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, ArrayRef ReductionInfos, + bool IsNoWait = false, bool IsTeamsReduction = false, +- bool HasDistribute = false, + ReductionGenCBKind ReductionGenCBKind = ReductionGenCBKind::MLIR, + std::optional GridValue = {}, unsigned ReductionBufNum = 1024, + Value *SrcLocInfo = nullptr); +@@ -1960,7 +1982,8 @@ + InsertPointTy AllocaIP, + ArrayRef ReductionInfos, + ArrayRef IsByRef, +- bool IsNoWait = false); ++ bool IsNoWait = false, ++ bool IsTeamsReduction = false); ///} -@@ -1627,6 +1658,31 @@ +@@ -2234,6 +2257,31 @@ MapNamesArray(MapNamesArray) {} }; @@ -10199,9 +10661,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenM + /// populate associated static structures. + struct TargetKernelDefaultBounds { + int32_t MinTeams = 1; -+ int32_t MaxTeams = -1; ++ SmallVector MaxTeams; + int32_t MinThreads = 1; -+ int32_t MaxThreads = -1; ++ SmallVector MaxThreads; + int32_t ReductionDataSize = 0; + int32_t ReductionBufferLength = 0; + }; @@ -10212,32 +10674,33 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenM + /// prior to the call to the kernel launch OpenMP RTL function. + struct TargetKernelRuntimeBounds { + Value *LoopTripCount = nullptr; -+ Value *TargetThreadLimit = nullptr; -+ Value *TeamsThreadLimit = nullptr; ++ SmallVector TargetThreadLimit; ++ SmallVector TeamsThreadLimit; + Value *MinTeams = nullptr; -+ Value *MaxTeams = nullptr; ++ SmallVector MaxTeams; + Value *MaxThreads = nullptr; + }; + /// Data structure that contains the needed information to construct the /// kernel args vector. struct TargetKernelArgs { -@@ -1635,7 +1691,7 @@ +@@ -2242,7 +2290,7 @@ /// Arguments passed to the runtime library TargetDataRTArgs RTArgs; /// The number of iterations -- Value *NumIterations; -+ Value *TripCount; +- Value *NumIterations = nullptr; ++ Value *TripCount = nullptr; /// The number of teams. - Value *NumTeams; + ArrayRef NumTeams; /// The number of threads. -@@ -1647,12 +1703,11 @@ - - /// Constructor for TargetKernelArgs +@@ -2255,13 +2303,12 @@ + // Constructors for TargetKernelArgs. + TargetKernelArgs() {} TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs, -- Value *NumIterations, Value *NumTeams, Value *NumThreads, -+ Value *TripCount, Value *NumTeams, Value *NumThreads, - Value *DynCGGroupMem, bool HasNoWait) +- Value *NumIterations, ArrayRef NumTeams, ++ Value *TripCount, ArrayRef NumTeams, + ArrayRef NumThreads, Value *DynCGGroupMem, + bool HasNoWait) - : NumTargetItems(NumTargetItems), RTArgs(RTArgs), - NumIterations(NumIterations), NumTeams(NumTeams), - NumThreads(NumThreads), DynCGGroupMem(DynCGGroupMem), @@ -10248,7 +10711,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenM }; /// Create the kernel args vector used by emitTargetKernel. This function -@@ -1967,6 +2022,14 @@ +@@ -2625,6 +2672,14 @@ Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr, Value *ThreadLimit = nullptr, Value *IfExpr = nullptr); @@ -10257,13 +10720,13 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenM + /// \param Loc The location where the teams construct was encountered. + /// \param AllocaIP The insertion points to be used for alloca instructions. + /// \param BodyGenCB Callback that will generate the region code. -+ InsertPointTy createDistribute(const LocationDescription &Loc, -+ InsertPointTy AllocaIP, -+ BodyGenCallbackTy BodyGenCB); ++ InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, ++ InsertPointTy AllocaIP, ++ BodyGenCallbackTy BodyGenCB); /// Generate conditional branch and relevant BasicBlocks through which private /// threads copy the 'copyin' variables from Master copy to threadprivate /// copies. -@@ -2079,15 +2142,10 @@ +@@ -2737,15 +2792,10 @@ /// /// \param Loc The insert and source location description. /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. @@ -10283,17 +10746,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenM /// Create a runtime call for kmpc_target_deinit /// -@@ -2113,6 +2171,9 @@ - static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, - int32_t LB, int32_t UB); - -+ /// Write the global variable to indicate which amdgcn ABI to use -+ static void emit__oclc_ABI_version(Module &M, int32_t COV); -+ - /// Read/write a bounds on teams for \p Kernel. Read will return 0 if none - /// is set. - static std::pair readTeamBoundsForKernel(const Triple &T, -@@ -2187,7 +2248,6 @@ +@@ -2846,7 +2896,6 @@ Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName); @@ -10301,54 +10754,51 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenM /// Type of BodyGen to use for region codegen /// /// Priv: If device pointer privatization is required, emit the body of the -@@ -2246,21 +2306,23 @@ +@@ -2905,27 +2954,31 @@ /// Generator for '#omp target' /// /// \param Loc where the target data construct was encountered. + /// \param IsSPMD whether this is an SPMD target launch. + /// \param IsOffloadEntry whether it is an offload entry. ++ /// \param IfCond value of the IF clause for the TARGET construct or nullptr. /// \param CodeGenIP The insertion point where the call to the outlined /// function should be emitted. /// \param EntryInfo The entry information about the function. - /// \param NumTeams Number of teams specified in the num_teams clause. - /// \param NumThreads Number of teams specified in the thread_limit clause. -+ /// \param DefaultBounds The default kernel lanuch bounds. -+ /// \param RuntimeBounds The runtime kernel lanuch bounds. - /// \param Inputs The input values to the region that will be passed. - /// as arguments to the outlined function. - /// \param BodyGenCB Callback that will generate the region code. - /// \param ArgAccessorFuncCB Callback that will generate accessors - /// instructions for passed in target arguments where neccessary -- InsertPointTy createTarget(const LocationDescription &Loc, -+ InsertPointTy createTarget(const LocationDescription &Loc, bool IsSPMD, - OpenMPIRBuilder::InsertPointTy AllocaIP, - OpenMPIRBuilder::InsertPointTy CodeGenIP, -- TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, -- int32_t NumThreads, -+ TargetRegionEntryInfo &EntryInfo, -+ const TargetKernelDefaultBounds &DefaultBounds, -+ const TargetKernelRuntimeBounds &RuntimeBounds, - SmallVectorImpl &Inputs, - GenMapInfoCallbackTy GenMapInfoCB, - TargetBodyGenCallbackTy BodyGenCB, -diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp ---- llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 2024-06-12 10:43:13.320203135 -0500 -+++ llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 2024-06-12 10:44:20.399498178 -0500 -@@ -145,10 +145,22 @@ - } - #endif - -+Function *GLOBAL_ReductionFunc = nullptr; -+ -+static uint64_t getTypeSizeInBytes(Module &M, Type *Type) { -+ return divideCeil(M.getDataLayout().getTypeSizeInBits(Type), 8); -+} -+ -+static Value *getTypeSizeInBytesValue(IRBuilder<> &Builder, Module &M, -+ Type *Type) { -+ return Builder.getInt64(getTypeSizeInBytes(M, Type)); -+} -+ - static const omp::GV &getGridValue(const Triple &T, Function *Kernel) { ++ /// \param DefaultBounds The default kernel launch bounds. ++ /// \param RuntimeBounds The runtime kernel launch bounds. + /// \param Inputs The input values to the region that will be passed. + /// as arguments to the outlined function. + /// \param BodyGenCB Callback that will generate the region code. + /// \param ArgAccessorFuncCB Callback that will generate accessors +- /// instructions for passed in target arguments where neccessary ++ /// instructions for passed in target arguments where necessary. + /// \param Dependencies A vector of DependData objects that carry +- // dependency information as passed in the depend clause ++ // dependency information as passed in the depend clause. + // \param HasNowait Whether the target construct has a `nowait` clause or not. + InsertPointOrErrorTy createTarget( +- const LocationDescription &Loc, bool IsOffloadEntry, +- OpenMPIRBuilder::InsertPointTy AllocaIP, ++ const LocationDescription &Loc, bool IsSPMD, bool IsOffloadEntry, ++ Value *IfCond, OpenMPIRBuilder::InsertPointTy AllocaIP, + OpenMPIRBuilder::InsertPointTy CodeGenIP, +- TargetRegionEntryInfo &EntryInfo, ArrayRef NumTeams, +- ArrayRef NumThreads, SmallVectorImpl &Inputs, +- GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, ++ TargetRegionEntryInfo &EntryInfo, ++ const TargetKernelDefaultBounds &DefaultBounds, ++ const TargetKernelRuntimeBounds &RuntimeBounds, ++ SmallVectorImpl &Inputs, GenMapInfoCallbackTy GenMapInfoCB, ++ TargetBodyGenCallbackTy BodyGenCB, + TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, + SmallVector Dependencies = {}, bool HasNowait = false); + +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp llvm-project-aso/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +--- llvm-project-aso-orig/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 2024-11-23 20:25:26.999274603 -0600 ++++ llvm-project-aso/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 2024-11-23 20:40:49.111951908 -0600 +@@ -153,6 +153,7 @@ if (T.isAMDGPU()) { StringRef Features = Kernel->getFnAttribute("target-features").getValueAsString(); @@ -10356,16 +10806,16 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu if (Features.count("+wavefrontsize64")) return omp::getAMDGPUGridValues<64>(); return omp::getAMDGPUGridValues<32>(); -@@ -365,7 +377,7 @@ +@@ -369,7 +370,7 @@ Builder.restoreIP(OuterAllocaIP); Instruction *FakeVal; AllocaInst *FakeValAddr = - Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr"); + Builder.CreateAlloca(Builder.getInt32Ty(), 0, nullptr, Name + ".addr"); - ToBeDeleted.push(FakeValAddr); + ToBeDeleted.push_back(FakeValAddr); if (AsPtr) { -@@ -510,7 +522,7 @@ +@@ -525,7 +526,7 @@ KernelArgs.RTArgs.MapTypesArray, KernelArgs.RTArgs.MapNamesArray, KernelArgs.RTArgs.MappersArray, @@ -10374,7 +10824,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu Flags, NumTeams3D, NumThreads3D, -@@ -722,6 +734,8 @@ +@@ -737,6 +738,8 @@ Extractor.excludeArgFromAggregate(V); Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); @@ -10383,7 +10833,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu // Forward target-cpu, target-features attributes to the outlined function. auto TargetCpuAttr = OuterFn->getFnAttribute("target-cpu"); -@@ -1190,7 +1204,7 @@ +@@ -1216,7 +1219,7 @@ static void targetParallelCallback( OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, @@ -10392,7 +10842,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu Value *ThreadID, const SmallVector &ToBeDeleted) { // Add some known attributes. IRBuilder<> &Builder = OMPIRBuilder->Builder; -@@ -1207,7 +1221,6 @@ +@@ -1233,7 +1236,6 @@ CallInst *CI = cast(OutlinedFn.user_back()); assert(CI && "Expected call instruction to outlined function"); CI->getParent()->setName("omp_parallel"); @@ -10400,16 +10850,18 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu Builder.SetInsertPoint(CI); Type *PtrTy = OMPIRBuilder->VoidPtr; Value *NullPtrValue = Constant::getNullValue(PtrTy); -@@ -1216,7 +1229,7 @@ +@@ -1241,8 +1243,8 @@ + // Add alloca for kernel args OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP(); Builder.SetInsertPoint(OuterAllocaBB, OuterAllocaBB->getFirstInsertionPt()); - AllocaInst *ArgsAlloca = +- AllocaInst *ArgsAlloca = - Builder.CreateAlloca(ArrayType::get(PtrTy, NumCapturedVars)); -+ Builder.CreateAlloca(ArrayType::get(PtrTy, NumCapturedVars), nullptr, "kernel_arg"); ++ AllocaInst *ArgsAlloca = Builder.CreateAlloca( ++ ArrayType::get(PtrTy, NumCapturedVars), nullptr, "kernel_arg"); Value *Args = ArgsAlloca; // Add address space cast if array for storing arguments is not allocated // in address space 0 -@@ -1278,7 +1291,7 @@ +@@ -1304,7 +1306,7 @@ static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, @@ -10418,7 +10870,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu const SmallVector &ToBeDeleted) { IRBuilder<> &Builder = OMPIRBuilder->Builder; FunctionCallee RTLFn; -@@ -1471,7 +1484,9 @@ +@@ -1498,7 +1500,9 @@ AllocaInst *PrivTIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr.local"); @@ -10429,7 +10881,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu // Add some fake uses for OpenMP provided arguments. ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use")); -@@ -1509,7 +1524,7 @@ +@@ -1537,7 +1541,7 @@ OI.PostOutlineCB = [=, ToBeDeletedVec = std::move(ToBeDeleted)](Function &OutlinedFn) { targetParallelCallback(this, OutlinedFn, OuterFn, OuterAllocaBlock, Ident, @@ -10438,7 +10890,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu ThreadID, ToBeDeletedVec); }; } else { -@@ -1517,7 +1532,7 @@ +@@ -1545,7 +1549,7 @@ OI.PostOutlineCB = [=, ToBeDeletedVec = std::move(ToBeDeleted)](Function &OutlinedFn) { hostParallelCallback(this, OutlinedFn, OuterFn, Ident, IfCondition, @@ -10447,963 +10899,81 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu }; } -@@ -2134,39 +2149,1191 @@ - /*IsCancellable*/ true); - } - -+static Value *getGPUWarpSize(Module &M, OpenMPIRBuilder &OMPBuilder) { -+ return OMPBuilder.Builder.CreateCall( -+ OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_get_warp_size), -+ {}); -+} -+ -+static Value *getGPUThreadID(Module &M, OpenMPIRBuilder &OMPBuilder) { -+ return OMPBuilder.Builder.CreateCall( -+ OMPBuilder.getOrCreateRuntimeFunction( -+ M, OMPRTL___kmpc_get_hardware_thread_id_in_block), -+ {}); -+} -+ -+static Value *getGPUNumThreads(Module &M, OpenMPIRBuilder &OMPBuilder) { -+ const char *LocSize = "__kmpc_get_hardware_num_threads_in_block"; -+ llvm::Function *F = M.getFunction(LocSize); -+ if (!F) { -+ LLVMContext &Ctx = M.getContext(); -+ Type *I32Type = Type::getInt32Ty(Ctx); -+ -+ F = Function::Create( -+ FunctionType::get(I32Type, std::nullopt, false), -+ GlobalVariable::ExternalLinkage, LocSize, M); -+ } -+ return OMPBuilder.Builder.CreateCall(F, std::nullopt, "nvptx_num_threads"); -+} -+ -+static Value *getNVPTXWarpID(Module &M, OpenMPIRBuilder &OMPIRBuilder) { -+ unsigned LaneIDBits = -+ llvm::Log2_32(OMPIRBuilder.Config.getGridValue().GV_Warp_Size); -+ return OMPIRBuilder.Builder.CreateAShr(getGPUThreadID(M, OMPIRBuilder), -+ LaneIDBits, "nvptx_warp_id"); -+} -+ -+static Value *getNVPTXLaneID(Module &M, OpenMPIRBuilder &OMPIRBuilder) { -+ unsigned LaneIDBits = -+ llvm::Log2_32(OMPIRBuilder.Config.getGridValue().GV_Warp_Size); -+ assert(LaneIDBits < 32 && "Invalid LaneIDBits size in NVPTX device."); -+ unsigned LaneIDMask = ~0u >> (32u - LaneIDBits); -+ return OMPIRBuilder.Builder.CreateAnd( -+ getGPUThreadID(M, OMPIRBuilder), -+ OMPIRBuilder.Builder.getInt32(LaneIDMask), "nvptx_lane_id"); -+} -+ -+namespace { -+enum CopyAction : unsigned { -+ // RemoteLaneToThread: Copy over a Reduce list from a remote lane in -+ // the warp using shuffle instructions. -+ RemoteLaneToThread, -+ // ThreadCopy: Make a copy of a Reduce list on the thread's stack. -+ ThreadCopy, -+}; -+} // namespace -+ -+struct CopyOptionsTy { -+ llvm::Value *RemoteLaneOffset; -+ llvm::Value *ScratchpadIndex; -+ llvm::Value *ScratchpadWidth; -+}; -+ -+static Value *castValueToType(Module &M, OpenMPIRBuilder &OMPBuilder, -+ Value *From, Type *ToType, -+ OpenMPIRBuilder::InsertPointTy AllocaIP, -+ const OpenMPIRBuilder::LocationDescription &Loc) { -+ IRBuilder<> &Builder = OMPBuilder.Builder; -+ Type *FromType = From->getType(); -+ uint64_t FromSize = -+ divideCeil(M.getDataLayout().getTypeSizeInBits(FromType), 8); -+ uint64_t ToSize = -+ divideCeil(M.getDataLayout().getTypeSizeInBits(ToType), 8); -+ assert(FromSize > 0 && "From size must be greater than zero"); -+ assert(ToSize > 0 && "From size must be greater than zero"); -+ if(FromType == ToType) -+ return From; -+ if(FromSize == ToSize) -+ return Builder.CreateBitCast(From, ToType); -+ if (ToType->isIntegerTy() && FromType->isIntegerTy()) -+ // FIXME(JAN): Assuming signed integer here, not sure how to find out -+ // if unsigned -+ return Builder.CreateIntCast(From, ToType, /*isSigned*/true); -+ OpenMPIRBuilder::InsertPointTy CurIP = Builder.saveIP(); -+ Builder.restoreIP(AllocaIP); -+ Value *CastItem = Builder.CreateAlloca(ToType, nullptr, "cast_tmp"); -+ Builder.restoreIP(CurIP); -+ -+ Value *ValCastItem = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ CastItem, FromType->getPointerTo(), "valcastitem"); -+ Builder.CreateStore(From, ValCastItem); -+ return Builder.CreateLoad(ToType, CastItem, "castitemload"); -+} -+ -+static Value * -+createRuntimeShuffleFunction(Module &M, OpenMPIRBuilder &OMPBuilder, -+ const OpenMPIRBuilder::LocationDescription &Loc, -+ OpenMPIRBuilder::InsertPointTy AllocaIP, -+ Value *Element, Type *ElementType, Value *Offset) { -+ LLVMContext &Ctx = M.getContext(); -+ IRBuilder<> &Builder = OMPBuilder.Builder; -+ uint64_t Size = -+ divideCeil(M.getDataLayout().getTypeSizeInBits(ElementType), 8); -+ assert(Size <= 8 && "Unsupported bitwidth in shuffle instruction"); -+ Function *ShuffleFunc = OMPBuilder.getOrCreateRuntimeFunctionPtr( -+ Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32 -+ : RuntimeFunction::OMPRTL___kmpc_shuffle_int64); -+ Type *IntType = Builder.getIntNTy(Size <= 4 ? 32 : 64); -+ Value *ElemCast = Builder.CreateCast(Instruction::SExt, Element, IntType); -+ Value *WarpSize = getGPUWarpSize(M, OMPBuilder); -+ Value *WarpSizeCast = -+ Builder.CreateIntCast(WarpSize, Type::getInt16Ty(Ctx), /*isSigned=*/true); -+ Value *ShuffleCall = -+ Builder.CreateCall(ShuffleFunc, {ElemCast, Offset, WarpSizeCast}); -+ return castValueToType(M, OMPBuilder, ShuffleCall, IntType, AllocaIP, Loc); -+} -+ -+static void shuffleAndStore(Value *SrcAddr, Value *DstAddr, Type *ElementType, -+ llvm::Value *Offset, Type* ReductionArrayTy, -+ const OpenMPIRBuilder::LocationDescription &Loc, -+ Module &M, -+ OpenMPIRBuilder &OMPBuilder, -+ OpenMPIRBuilder::InsertPointTy AllocaIP) { -+ LLVMContext &Ctx = M.getContext(); -+ IRBuilder<> &Builder = OMPBuilder.Builder; -+ uint64_t Size = -+ divideCeil(M.getDataLayout().getTypeSizeInBits(ElementType), 8); -+ Type *PtrTy = PointerType::getUnqual(Ctx); -+ Value *ElemPtr = DstAddr; -+ Value *Ptr = SrcAddr; -+ // Value *PtrEnd = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ //Builder.CreateConstGEP1_64(ReductionArrayTy, SrcAddr, 1), PtrTy); -+ for (int IntSize = 8; IntSize >= 1; IntSize /= 2) { -+ if(Size < IntSize) -+ continue; -+ // FIXME(JAN): Check if there is a function to convert from bytes to bits -+ Type *IntTy = Builder.getIntNTy(IntSize*8); -+ Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ Ptr, IntTy->getPointerTo(), "ptrcast"); -+ ElemPtr = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ ElemPtr, IntTy->getPointerTo(), "elemptrcast"); -+ -+ // FIXME(JAN): Implement loop to handle larger size -+ assert(((Size / IntSize) <= 1) && "Unsupported IntSize"); -+ Value *Val = Builder.CreateLoad(IntTy, Ptr); -+ Value *Res = createRuntimeShuffleFunction(M, OMPBuilder, Loc, AllocaIP, Val, -+ IntTy, Offset); -+ Builder.CreateStore(Res, ElemPtr); -+ Ptr = Builder.CreateConstGEP1_64(ReductionArrayTy, Ptr, 1, "ptrgep"); -+ ElemPtr = -+ Builder.CreateConstGEP1_64(ReductionArrayTy, ElemPtr, 1, "elemptrgep"); -+ Size = Size % IntSize; -+ } -+} -+ -+static void -+emitReductionListCopy(CopyAction Action, Type *ReductionArrayTy, -+ ArrayRef ReductionInfos, -+ Value *SrcBase, Value *DestBase, -+ Module &M, OpenMPIRBuilder &OMPBuilder, -+ const OpenMPIRBuilder::LocationDescription &Loc, -+ OpenMPIRBuilder::InsertPointTy AllocaIP, -+ CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) { -+ LLVMContext &Ctx = M.getContext(); -+ IRBuilder<> &Builder = OMPBuilder.Builder; -+ Type *PtrTy = PointerType::getUnqual(Ctx); -+ -+ Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset; -+ -+ for (auto En : enumerate(ReductionInfos)) { -+ const OpenMPIRBuilder::ReductionInfo &RI = En.value(); -+ Value *SrcElementAddr = nullptr; -+ Value *DestElementAddr = nullptr; -+ Value *DestElementPtrAddr = nullptr; -+ bool ShuffleInElement = false; -+ bool UpdateDestListPtr = false; -+ -+ // Step 1.1: Get the address for the src element in the Reduce list. -+ Value *SrcElementPtrAddr = Builder.CreateConstGEP2_64( -+ ReductionArrayTy, SrcBase, 0, En.index(), "srcelementptraddr"); -+ SrcElementAddr = -+ Builder.CreateLoad(PtrTy, SrcElementPtrAddr, "srcelementaddr"); -+ -+ // Step 1.2: Create a temporary to store the element in the destination -+ // Reduce list. -+ DestElementPtrAddr = Builder.CreateInBoundsGEP( -+ ReductionArrayTy, DestBase, -+ {Builder.getInt64(0), Builder.getInt64(En.index())}, -+ "destelementptraddr"); -+ switch (Action) { -+ case RemoteLaneToThread: { -+ OpenMPIRBuilder::InsertPointTy CurIP = Builder.saveIP(); -+ Builder.restoreIP(AllocaIP); -+ DestElementAddr = Builder.CreateAlloca(RI.ElementType, nullptr, -+ ".omp.reduction.element"); -+ Builder.restoreIP(CurIP); -+ ShuffleInElement = true; -+ UpdateDestListPtr = true; -+ break; -+ } -+ case ThreadCopy: { -+ DestElementAddr = -+ Builder.CreateLoad(PtrTy, DestElementPtrAddr, "destelementaddr"); -+ break; -+ } -+ } -+ -+ // FIXME(JAN): Original code in clanguses .withElementType(...) -+ // check if this generates any code -+ -+ if (ShuffleInElement) { -+ shuffleAndStore(SrcElementAddr, DestElementAddr, -+ RI.ElementType, RemoteLaneOffset, -+ ReductionArrayTy, Loc, M, -+ OMPBuilder, AllocaIP); -+ } else { -+ // FIXME(JAN): Assume Scalar here (TEK_Scalar in Clang) -+ Value *Elem = Builder.CreateLoad(RI.ElementType, SrcElementAddr); -+ Builder.CreateStore(Elem, DestElementAddr); -+ } -+ // Step 3.1: Modify reference in dest Reduce list as needed. -+ // Modifying the reference in Reduce list to point to the newly -+ // created element. The element is live in the current function -+ // scope and that of functions it invokes (i.e., reduce_function). -+ // RemoteReduceData[i] = (void*)&RemoteElem -+ if (UpdateDestListPtr) { -+ Value *CastDestAddr = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ DestElementAddr, PtrTy, "castdestaddr"); -+ Builder.CreateStore(CastDestAddr, DestElementPtrAddr); -+ } -+ } -+} -+ -+static OpenMPIRBuilder::InsertPointTy getIPAfterInstr(Instruction *I) { -+ BasicBlock::iterator it(I); -+ it++; -+ return OpenMPIRBuilder::InsertPointTy(I->getParent(), it); -+} -+ -+ -+static Function *emitShuffleAndReduceFunction( -+ Module &M, const OpenMPIRBuilder::LocationDescription &Loc, -+ ArrayRef ReductionInfos, -+ Function* ReduceFn, -+ OpenMPIRBuilder &OMPBuilder) { -+ IRBuilder<> &Builder = OMPBuilder.Builder; -+ -+ LLVMContext &Ctx = M.getContext(); -+ Type *VoidTy = Type::getVoidTy(Ctx); -+ Type *PtrTy = PointerType::getUnqual(Ctx); -+ Type *I16Type = Type::getInt16Ty(Ctx); -+ auto FuncTy = FunctionType::get( -+ VoidTy, {PtrTy, I16Type, I16Type, I16Type}, /* IsVarArg */ false); -+ Function *SarFunc = -+ Function::Create(FuncTy, GlobalVariable::InternalLinkage, -+ "_omp_reduction_shuffle_and_reduce_func", &M); -+ SarFunc->setDoesNotRecurse(); -+ -+ // Set arg names -+ Argument *Arg0 = SarFunc->getArg(0); -+ Argument *Arg1 = SarFunc->getArg(1); -+ Argument *Arg2 = SarFunc->getArg(2); -+ Argument *Arg3 = SarFunc->getArg(3); -+ Arg0->setName("reduce_list_arg"); -+ Arg1->setName("lane_id_arg"); -+ Arg2->setName("remote_lane_offset_arg"); -+ Arg3->setName("algo_ver_arg"); -+ -+ BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "", SarFunc); -+ Builder.SetInsertPoint(EntryBlock); -+ -+ Type *Arg0Type = Arg0->getType(); -+ Type *ArgNType = Arg1->getType(); -+ Type *ArgNPtrType = Arg1->getType()->getPointerTo(); -+ Value *ReduceListAlloca = -+ Builder.CreateAlloca(Arg0Type, nullptr, Arg0->getName() + ".addr"); -+ Value *LaneIdAlloca = -+ Builder.CreateAlloca(ArgNType, nullptr, Arg1->getName() + ".addr"); -+ Value *RemoteLaneOffsetAlloca = -+ Builder.CreateAlloca(ArgNType, nullptr, Arg2->getName() + ".addr"); -+ Value *AlgoVerAlloca = -+ Builder.CreateAlloca(ArgNType, nullptr, Arg3->getName() + ".addr"); -+ // FIXME(Jan): Compute reduction list array type -+ auto *RedListArrayTy = ArrayType::get(PtrTy, 1); -+ Instruction *RemoteReductionListAlloca = Builder.CreateAlloca( -+ RedListArrayTy, nullptr, ".omp.reduction.remote_reduce_list"); -+ -+ Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ ReduceListAlloca, Arg0Type, ReduceListAlloca->getName() + ".acast"); -+ Value *LaneIdAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ LaneIdAlloca, ArgNPtrType, LaneIdAlloca->getName() + ".acast"); -+ Value *RemoteLaneOffsetAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ RemoteLaneOffsetAlloca, ArgNPtrType, -+ RemoteLaneOffsetAlloca->getName() + ".acast"); -+ Value *AlgoVerAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ AlgoVerAlloca, ArgNPtrType, AlgoVerAlloca->getName() + ".acast"); -+ Value *RemoteListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ RemoteReductionListAlloca, PtrTy, -+ RemoteReductionListAlloca->getName() + ".acast"); -+ -+ Builder.CreateStore(Arg0, ReduceListAddrCast); -+ Builder.CreateStore(Arg1, LaneIdAddrCast); -+ Builder.CreateStore(Arg2, RemoteLaneOffsetAddrCast); -+ Builder.CreateStore(Arg3, AlgoVerAddrCast); -+ -+ Value *ReduceList = -+ Builder.CreateLoad(Arg0Type, ReduceListAddrCast, "reduce_list"); -+ Value *LaneId = Builder.CreateLoad(ArgNType, LaneIdAddrCast, "lane_id"); -+ Value *RemoteLaneOffset = Builder.CreateLoad( -+ ArgNType, RemoteLaneOffsetAddrCast, "remote_lane_offset"); -+ Value *AlgoVer = Builder.CreateLoad(ArgNType, AlgoVerAddrCast, "algo_ver"); -+ -+ OpenMPIRBuilder::InsertPointTy AllocaIP = -+ getIPAfterInstr(RemoteReductionListAlloca); -+ emitReductionListCopy(RemoteLaneToThread, RedListArrayTy, ReductionInfos, -+ ReduceList, RemoteListAddrCast, M, OMPBuilder, -+ Loc, AllocaIP, {RemoteLaneOffset, nullptr, nullptr}); -+ -+ // The actions to be performed on the Remote Reduce list is dependent -+ // on the algorithm version. -+ // -+ // if (AlgoVer==0) || (AlgoVer==1 && (LaneId < Offset)) || (AlgoVer==2 && -+ // LaneId % 2 == 0 && Offset > 0): -+ // do the reduction value aggregation -+ // -+ // The thread local variable Reduce list is mutated in place to host the -+ // reduced data, which is the aggregated value produced from local and -+ // remote lanes. -+ // -+ // Note that AlgoVer is expected to be a constant integer known at compile -+ // time. -+ // When AlgoVer==0, the first conjunction evaluates to true, making -+ // the entire predicate true during compile time. -+ // When AlgoVer==1, the second conjunction has only the second part to be -+ // evaluated during runtime. Other conjunctions evaluates to false -+ // during compile time. -+ // When AlgoVer==2, the third conjunction has only the second part to be -+ // evaluated during runtime. Other conjunctions evaluates to false -+ // during compile time. -+ Value *CondAlgo0 = Builder.CreateIsNull(AlgoVer); -+ Value *Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1)); -+ Value *LaneComp = Builder.CreateICmpULT(LaneId, RemoteLaneOffset); -+ Value *CondAlgo1 = Builder.CreateAnd(Algo1, LaneComp); -+ Value *Algo2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(2)); -+ Value *LaneIdAnd1 = Builder.CreateAnd(LaneId, Builder.getInt16(1)); -+ Value *LaneIdComp = Builder.CreateIsNull(LaneIdAnd1); -+ Value *Algo2AndLaneIdComp = Builder.CreateAnd(Algo2, LaneIdComp); -+ Value *RemoteOffsetComp = -+ Builder.CreateICmpSGT(RemoteLaneOffset, Builder.getInt16(0)); -+ Value *CondAlgo2 = Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp); -+ Value *CA0OrCA1 = Builder.CreateOr(CondAlgo0, CondAlgo1); -+ Value *CondReduce = Builder.CreateOr(CA0OrCA1, CondAlgo2); -+ -+ -+ BasicBlock *ThenBB = BasicBlock::Create(Ctx, "then", SarFunc); -+ BasicBlock *ElseBB = BasicBlock::Create(Ctx, "else", SarFunc); -+ BasicBlock *MergeBB = BasicBlock::Create(Ctx, "ifcont", SarFunc); -+ -+ Builder.CreateCondBr(CondReduce, ThenBB, ElseBB); -+ Builder.SetInsertPoint(ThenBB); -+ // reduce_function(LocalReduceList, RemoteReduceList) -+ Value *LocalReduceListPtr = -+ Builder.CreatePointerBitCastOrAddrSpaceCast(ReduceList, PtrTy); -+ Value *RemoteReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ RemoteListAddrCast, PtrTy); -+ Builder.CreateCall(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr}); -+ Builder.CreateBr(MergeBB); -+ Builder.SetInsertPoint(ElseBB); -+ Builder.CreateBr(MergeBB); -+ Builder.SetInsertPoint(MergeBB); -+ -+ Value *Algo1_2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1)); -+ Value *LaneIdGtOffset = Builder.CreateICmpUGE(LaneId, RemoteLaneOffset); -+ Value *CondCopy = Builder.CreateAnd(Algo1_2, LaneIdGtOffset); -+ -+ BasicBlock *CpyThenBB = BasicBlock::Create(Ctx, "cpy_then", SarFunc); -+ BasicBlock *CpyElseBB = BasicBlock::Create(Ctx, "cpy_else", SarFunc); -+ BasicBlock *CpyMergeBB = BasicBlock::Create(Ctx, "cpy_ifcont", SarFunc); -+ -+ Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB); -+ -+ Builder.SetInsertPoint(CpyThenBB); -+ emitReductionListCopy(ThreadCopy, RedListArrayTy, ReductionInfos, -+ RemoteListAddrCast, ReduceList, M, OMPBuilder, -+ Loc, AllocaIP); -+ Builder.CreateBr(CpyMergeBB); -+ Builder.SetInsertPoint(CpyElseBB); -+ Builder.CreateBr(CpyMergeBB); -+ Builder.SetInsertPoint(CpyMergeBB); -+ Builder.CreateRetVoid(); -+ -+ return SarFunc; -+} -+ -+static Function *emitInterWarpCopyFunction( -+ Module &M, const OpenMPIRBuilder::LocationDescription &Loc, -+ ArrayRef ReductionInfos, -+ OpenMPIRBuilder &OMPBuilder) { -+ IRBuilder<> &Builder = OMPBuilder.Builder; -+ OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); -+ LLVMContext &Ctx = M.getContext(); -+ Type *VoidTy = Type::getVoidTy(Ctx); -+ Type *PtrTy = PointerType::getUnqual(Ctx); -+ Type *I32Type = Type::getInt32Ty(Ctx); -+ auto FuncTy = -+ FunctionType::get(VoidTy, {PtrTy, I32Type}, /* IsVarArg */ false); -+ Function *WcFunc = -+ Function::Create(FuncTy, GlobalVariable::InternalLinkage, -+ "_omp_reduction_inter_warp_copy_func", &M); -+ WcFunc->setDoesNotRecurse(); -+ -+ // Set arg names -+ Argument *Arg0 = WcFunc->getArg(0); -+ Argument *Arg1 = WcFunc->getArg(1); -+ Arg0->setName("reduce_list"); -+ Arg1->setName("num_warps"); -+ -+ // Ensure data transfer storage -+ unsigned WarpSize = OMPBuilder.Config.getGridValue().GV_Warp_Size; -+ // FIXME(Jan): Not sure about the array type here, but it is I32 in Clang -+ auto *ArrayTy = ArrayType::get(I32Type, WarpSize); -+ StringRef TransferMediumName = -+ "__openmp_nvptx_data_transfer_temporary_storage"; -+ GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName); -+ if (!TransferMedium) { -+ unsigned SharedAddressSpace = -+ 3; /* FIXME(Jan): C.getTargetAddressSpace(LangAS::cuda_shared); */ -+ TransferMedium = new GlobalVariable( -+ M, ArrayTy, /*isConstant=*/false, GlobalVariable::WeakAnyLinkage, -+ UndefValue::get(ArrayTy), TransferMediumName, -+ /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal, -+ SharedAddressSpace); -+ } -+ -+ BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "", WcFunc); -+ Builder.SetInsertPoint(EntryBlock); -+ -+ Type *Arg0Type = Arg0->getType(); -+ Type *Arg1Type = Arg1->getType(); -+ Value *ReduceListAlloca = -+ Builder.CreateAlloca(Arg0Type, nullptr, Arg0->getName() + ".addr"); -+ Instruction *NumWarpsAlloca = -+ Builder.CreateAlloca(Arg1Type, nullptr, Arg1->getName() + ".addr"); -+ Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ ReduceListAlloca, Arg0Type, ReduceListAlloca->getName() + ".acast"); -+ Value *NumWarpsAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ NumWarpsAlloca, Arg1Type->getPointerTo(), -+ NumWarpsAlloca->getName() + ".acast"); -+ Builder.CreateStore(Arg0, ReduceListAddrCast); -+ Builder.CreateStore(Arg1, NumWarpsAddrCast); -+ -+ // Get GPU Info -+ Value *ThreadID = getGPUThreadID(M, OMPBuilder); -+ Value *LaneID = getNVPTXLaneID(M, OMPBuilder); -+ Value *WarpID = getNVPTXWarpID(M, OMPBuilder); -+ -+ Value *ReduceListArg = -+ Builder.CreateLoad(PtrTy, ReduceListAddrCast, "reduce_list_arg"); -+ -+ for (auto En : enumerate(ReductionInfos)) { -+ const OpenMPIRBuilder::ReductionInfo &RI = En.value(); -+ Type *ElementTy = RI.ElementType; -+ unsigned NumTypeBits = M.getDataLayout().getTypeSizeInBits(ElementTy); -+ unsigned RealTySize = divideCeil(NumTypeBits, 8); -+ for (unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) { -+ unsigned NumIters = RealTySize/TySize; -+ if (NumIters == 0) -+ continue; -+ // Type *CopyTy = Builder.getIntNTy(TySize); -+ Type *Int32Ty = Builder.getInt32Ty(); -+ Value *Cnt = nullptr; -+ Value *CntAddrAcast = nullptr; -+ BasicBlock *PrecondBB = nullptr; -+ BasicBlock *ExitBB = nullptr; -+ -+ if (NumIters > 1) { -+ OpenMPIRBuilder::InsertPointTy CurrIP = Builder.saveIP(); -+ Builder.SetInsertPoint(NumWarpsAlloca); -+ Value *CntAddr = Builder.CreateAlloca(Int32Ty, nullptr, ".cnt.addr"); -+ Builder.restoreIP(CurrIP); -+ CntAddrAcast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ CntAddr, PtrTy, CntAddr->getName() + ".acast"); -+ Builder.CreateStore(Constant::getNullValue(Int32Ty), CntAddrAcast); -+ PrecondBB = BasicBlock::Create(Ctx, "precond", WcFunc); -+ ExitBB = BasicBlock::Create(Ctx, "exit", WcFunc); -+ BasicBlock *BodyBB = BasicBlock::Create(Ctx, "body", WcFunc); -+ Builder.CreateBr(PrecondBB); -+ Builder.SetInsertPoint(PrecondBB); -+ Cnt = Builder.CreateLoad(Int32Ty, CntAddrAcast, "cnt"); -+ Value *Cmp = Builder.CreateICmpULT(Cnt, Builder.getInt32(NumIters)); -+ Builder.CreateCondBr(Cmp, BodyBB, ExitBB); -+ Builder.SetInsertPoint(BodyBB); -+ } -+ -+ OMPBuilder.createBarrier( -+ OpenMPIRBuilder::LocationDescription(Builder.saveIP(), Loc.DL), -+ omp::Directive::OMPD_unknown, -+ /* ForceSimpleCall */ false, -+ /* CheckCancelFlag */ true); -+ BasicBlock *ThenBB = BasicBlock::Create(Ctx, "then", WcFunc); -+ BasicBlock *ElseBB = BasicBlock::Create(Ctx, "else", WcFunc); -+ BasicBlock *MergeBB = BasicBlock::Create(Ctx, "ifcont", WcFunc); -+ -+ // if (lane_id == 0) -+ Value *IsWarpMaster = Builder.CreateIsNull(LaneID, "warp_master"); -+ Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB); -+ -+ // then -+ // Reduce element = LocalReduceList[i] -+ Builder.SetInsertPoint(ThenBB); -+ // FIXME(JAN): Should array type be passed in? -+ auto *RedListArrayTy = ArrayType::get(PtrTy, 1); -+ // FIXME(JAN): maybe it should be 0,0 and not use En.index() -+ Value *ReduceListElementPtrPtr = Builder.CreateConstInBoundsGEP2_64( -+ RedListArrayTy, ReduceListArg, 0, En.index()); -+ Value *ReduceListElementPtr = Builder.CreateLoad( -+ PtrTy, ReduceListElementPtrPtr, "reduce_list_element_ptr"); -+ if (NumIters > 1) -+ ReduceListElementPtr = Builder.CreateGEP(Int32Ty, ReduceListElementPtr, Cnt); -+ -+ Value *TransferElemAddr = Builder.CreateInBoundsGEP( -+ ArrayTy, TransferMedium, {Builder.getInt64(0), WarpID}); -+ Value *ReduceListElement = Builder.CreateLoad( -+ I32Type, ReduceListElementPtr, "reduce_list_element"); -+ Builder.CreateStore(ReduceListElement, TransferElemAddr, -+ /*IsVolatile*/ true); -+ Builder.CreateBr(MergeBB); -+ -+ // else -+ Builder.SetInsertPoint(ElseBB); -+ Builder.CreateBr(MergeBB); -+ -+ // endif -+ Builder.SetInsertPoint(MergeBB); -+ OMPBuilder.createBarrier( -+ OpenMPIRBuilder::LocationDescription(Builder.saveIP(), Loc.DL), -+ omp::Directive::OMPD_unknown, -+ /* ForceSimpleCall */ false, -+ /* CheckCancelFlag */ true); -+ -+ // Warp 0 copies reduce element from transfer medium -+ BasicBlock *W0ThenBB = BasicBlock::Create(Ctx, "w0then", WcFunc); -+ BasicBlock *W0ElseBB = BasicBlock::Create(Ctx, "w0else", WcFunc); -+ BasicBlock *W0MergeBB = BasicBlock::Create(Ctx, "w0ifcont", WcFunc); -+ -+ Value *NumWarpsVal = -+ Builder.CreateLoad(I32Type, NumWarpsAddrCast, "num_warps"); -+ Value *IsActiveThread = -+ Builder.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread"); -+ Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB); -+ -+ // W0then -+ // SecMEdiumPtr = &medium[tid] -+ Builder.SetInsertPoint(W0ThenBB); -+ Value *SrcMediumPtrVal = Builder.CreateInBoundsGEP( -+ ArrayTy, TransferMedium, {Builder.getInt64(0), ThreadID}); -+ // SrcMediumVal = *SrcMediumPtr -+ // TODO(JAN): Bitcast here, but no load? skipping for now -+ Value *TargetElementPtrPtr = Builder.CreateConstInBoundsGEP2_64( -+ RedListArrayTy, ReduceListArg, 0, En.index()); -+ Value *TargetElementPtr = Builder.CreateLoad(PtrTy, TargetElementPtrPtr); -+ if (NumIters > 1) -+ TargetElementPtr = Builder.CreateGEP(Int32Ty, TargetElementPtr, Cnt); -+ -+ Value *SrcMediumValue = -+ Builder.CreateLoad(I32Type, SrcMediumPtrVal, /*IsVolatile*/ true); -+ Builder.CreateStore(SrcMediumValue, TargetElementPtr); -+ Builder.CreateBr(W0MergeBB); -+ -+ // W0else -+ Builder.SetInsertPoint(W0ElseBB); -+ Builder.CreateBr(W0MergeBB); -+ -+ // W0endif -+ Builder.SetInsertPoint(W0MergeBB); -+ if (NumIters > 1) { -+ Cnt = Builder.CreateNSWAdd(Cnt, Builder.getInt32(1)); -+ Builder.CreateStore(Cnt, CntAddrAcast); -+ Builder.CreateBr(PrecondBB); -+ Builder.SetInsertPoint(ExitBB); -+ } -+ } -+ } -+ -+ Builder.CreateRetVoid(); -+ Builder.restoreIP(OldIP); -+ return WcFunc; -+} -+ -+/// This function emits a helper that copies all the reduction variables from -+/// the team into the provided global buffer for the reduction variables. -+/// -+/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data) -+/// For all data entries D in reduce_data: -+/// Copy local D to buffer.D[Idx] -+static Function *emitListToGlobalCopyFunction( -+ Module &M, const OpenMPIRBuilder::LocationDescription &Loc, -+ ArrayRef ReductionInfos, -+ OpenMPIRBuilder &OMPBuilder) { -+ IRBuilder<> &Builder = OMPBuilder.Builder; -+ OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); -+ LLVMContext &Ctx = M.getContext(); -+ Type *VoidTy = Type::getVoidTy(Ctx); -+ Type *Int32Ty = Builder.getInt32Ty(); -+ Type *PtrTy = PointerType::getUnqual(Ctx); -+ auto FuncTy = -+ FunctionType::get(VoidTy, {PtrTy, Int32Ty, PtrTy}, /* IsVarArg */ false); -+ Function *LtGCFunc = -+ Function::Create(FuncTy, GlobalVariable::InternalLinkage, -+ "_omp_reduction_list_to_global_copy_func", &M); -+ LtGCFunc->setDoesNotRecurse(); -+ -+ BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "", LtGCFunc); -+ Builder.SetInsertPoint(EntryBlock); -+ -+ // Set arg names -+ Argument *Arg0 = LtGCFunc->getArg(0); -+ Argument *Arg1 = LtGCFunc->getArg(1); -+ Argument *Arg2 = LtGCFunc->getArg(2); -+ Arg0->setName("buffer_arg"); -+ Arg1->setName("idx_arg"); -+ Arg2->setName("reduce_list_arg"); -+ -+ Value *BufferArgAlloca = -+ Builder.CreateAlloca(PtrTy, nullptr, Arg0->getName() + ".addr"); -+ Value *IdxArgAlloca = -+ Builder.CreateAlloca(Int32Ty, nullptr, Arg1->getName() + ".addr"); -+ Value *ReduceListArgAlloca = -+ Builder.CreateAlloca(PtrTy, nullptr, Arg2->getName() + ".addr"); -+ Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ BufferArgAlloca, PtrTy, BufferArgAlloca->getName() + ".acast"); -+ Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ IdxArgAlloca, PtrTy, IdxArgAlloca->getName() + ".acast"); -+ Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ ReduceListArgAlloca, PtrTy, ReduceListArgAlloca->getName() + ".acast"); -+ // FIXME(JAN): Assume a single globalized variable for now, this should be -+ // passed in -+ Type *SingleReductionTy = ReductionInfos.begin()->ElementType; -+ Type *TypeArgs[] = {SingleReductionTy}; -+ StructType *ReductionsBufferTy = -+ StructType::create(Ctx, TypeArgs, "_globalized_locals_ty"); -+ -+ Builder.CreateStore(Arg0, BufferArgAddrCast); -+ Builder.CreateStore(Arg1, IdxArgAddrCast); -+ Builder.CreateStore(Arg2, ReduceListArgAddrCast); -+ -+ Value *BufferArg = Builder.CreateLoad(PtrTy, BufferArgAddrCast, "buffer"); -+ Value *Idxs[] = { -+ Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast, "idxs")}; -+ Value *ReduceListArg = -+ Builder.CreateLoad(PtrTy, ReduceListArgAddrCast, "reduce_list"); -+ // FIXME(Jan): Assume TEK_SCALAR -+ for (auto En : enumerate(ReductionInfos)) { -+ const OpenMPIRBuilder::ReductionInfo &RI = En.value(); -+ // FIXME(Jan): Compute array type -+ auto *RedListArrayTy = ArrayType::get(PtrTy, 1); -+ Value *TargetElementPtrPtr = Builder.CreateConstInBoundsGEP2_64( -+ RedListArrayTy, ReduceListArg, 0, En.index()); -+ Value *TargetElementPtr = Builder.CreateLoad(PtrTy, TargetElementPtrPtr); -+ -+ Value *BufferVD = -+ Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArg, Idxs); -+ Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32( -+ ReductionsBufferTy, BufferVD, 0, En.index()); -+ Value *TargetElement = Builder.CreateLoad(RI.ElementType, TargetElementPtr); -+ Builder.CreateStore(TargetElement, GlobValPtr); -+ } -+ -+ Builder.CreateRetVoid(); -+ Builder.restoreIP(OldIP); -+ return LtGCFunc; -+} -+ -+/// This function emits a helper that copies all the reduction variables from -+/// the team into the provided global buffer for the reduction variables. -+/// -+/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data) -+/// For all data entries D in reduce_data: -+/// Copy local D to buffer.D[Idx] -+static Function *emitGlobalToListCopyFunction( -+ Module &M, const OpenMPIRBuilder::LocationDescription &Loc, -+ ArrayRef ReductionInfos, -+ OpenMPIRBuilder &OMPBuilder) { -+ IRBuilder<> &Builder = OMPBuilder.Builder; -+ OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); -+ LLVMContext &Ctx = M.getContext(); -+ Type *VoidTy = Type::getVoidTy(Ctx); -+ Type *Int32Ty = Builder.getInt32Ty(); -+ Type *PtrTy = PointerType::getUnqual(Ctx); -+ auto FuncTy = -+ FunctionType::get(VoidTy, {PtrTy, Int32Ty, PtrTy}, /* IsVarArg */ false); -+ Function *LtGCFunc = -+ Function::Create(FuncTy, GlobalVariable::InternalLinkage, -+ "_omp_reduction_global_to_list_copy_func", &M); -+ LtGCFunc->setDoesNotRecurse(); -+ -+ BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "", LtGCFunc); -+ Builder.SetInsertPoint(EntryBlock); -+ -+ // Set arg names -+ Argument *Arg0 = LtGCFunc->getArg(0); -+ Argument *Arg1 = LtGCFunc->getArg(1); -+ Argument *Arg2 = LtGCFunc->getArg(2); -+ Arg0->setName("buffer_arg"); -+ Arg1->setName("idx_arg"); -+ Arg2->setName("reduce_list_arg"); -+ -+ Value *BufferArgAlloca = -+ Builder.CreateAlloca(PtrTy, nullptr, Arg0->getName() + ".addr"); -+ Value *IdxArgAlloca = -+ Builder.CreateAlloca(Int32Ty, nullptr, Arg1->getName() + ".addr"); -+ Value *ReduceListArgAlloca = -+ Builder.CreateAlloca(PtrTy, nullptr, Arg2->getName() + ".addr"); -+ Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ BufferArgAlloca, PtrTy, BufferArgAlloca->getName() + ".acast"); -+ Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ IdxArgAlloca, PtrTy, IdxArgAlloca->getName() + ".acast"); -+ Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ ReduceListArgAlloca, PtrTy, ReduceListArgAlloca->getName() + ".acast"); -+ // FIXME(JAN): Assume a single globalized variable for now, this should be -+ // passed in -+ Type *SingleReductionTy = ReductionInfos.begin()->ElementType; -+ Type *TypeArgs[] = {SingleReductionTy}; -+ StructType *ReductionsBufferTy = -+ StructType::create(Ctx, TypeArgs, "_globalized_locals_ty"); -+ -+ Builder.CreateStore(Arg0, BufferArgAddrCast); -+ Builder.CreateStore(Arg1, IdxArgAddrCast); -+ Builder.CreateStore(Arg2, ReduceListArgAddrCast); -+ -+ Value *BufferArg = Builder.CreateLoad(PtrTy, BufferArgAddrCast, "buffer"); -+ Value *Idxs[] = { -+ Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast, "idxs")}; -+ Value *ReduceListArg = -+ Builder.CreateLoad(PtrTy, ReduceListArgAddrCast, "reduce_list"); -+ // FIXME(Jan): Assume TEK_SCALAR -+ for (auto En : enumerate(ReductionInfos)) { -+ const OpenMPIRBuilder::ReductionInfo &RI = En.value(); -+ // FIXME(Jan): Compute array type -+ auto *RedListArrayTy = ArrayType::get(PtrTy, 1); -+ Value *TargetElementPtrPtr = Builder.CreateConstInBoundsGEP2_64( -+ RedListArrayTy, ReduceListArg, 0, En.index()); -+ Value *TargetElementPtr = Builder.CreateLoad(PtrTy, TargetElementPtrPtr); -+ -+ Value *BufferVD = -+ Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArg, Idxs); -+ Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32( -+ ReductionsBufferTy, BufferVD, 0, En.index()); -+ Value *TargetElement = Builder.CreateLoad(RI.ElementType, GlobValPtr); -+ Builder.CreateStore(TargetElement, TargetElementPtr); -+ } -+ -+ Builder.CreateRetVoid(); -+ Builder.restoreIP(OldIP); -+ return LtGCFunc; -+} -+ -+/// This function emits a helper that reduces all the reduction variables from -+/// the team into the provided global buffer for the reduction variables. -+/// -+/// void list_to_global_reduce_func(void *buffer, int Idx, void *reduce_data) -+/// void *GlobPtrs[]; -+/// GlobPtrs[0] = (void*)&buffer.D0[Idx]; -+/// ... -+/// GlobPtrs[N] = (void*)&buffer.DN[Idx]; -+/// reduce_function(GlobPtrs, reduce_data); -+/// Create a function with a unique name and a "void (i8*, i8*)" signature in -+/// the given module and return it. -+static Function *emitListToGlobalReduceFunction( -+ Module &M, const OpenMPIRBuilder::LocationDescription &Loc, -+ ArrayRef ReductionInfos, Function *ReduceFn, -+ OpenMPIRBuilder &OMPBuilder) { -+ IRBuilder<> &Builder = OMPBuilder.Builder; -+ OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); -+ LLVMContext &Ctx = M.getContext(); -+ Type *VoidTy = Type::getVoidTy(Ctx); -+ Type *Int32Ty = Builder.getInt32Ty(); -+ Type *PtrTy = PointerType::getUnqual(Ctx); -+ auto FuncTy = -+ FunctionType::get(VoidTy, {PtrTy, Int32Ty, PtrTy}, /* IsVarArg */ false); -+ Function *LtGRFunc = -+ Function::Create(FuncTy, GlobalVariable::InternalLinkage, -+ "_omp_reduction_list_to_global_reduce_func", &M); -+ LtGRFunc->setDoesNotRecurse(); -+ -+ BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "", LtGRFunc); -+ Builder.SetInsertPoint(EntryBlock); -+ -+ // Set arg names -+ Argument *Arg0 = LtGRFunc->getArg(0); -+ Argument *Arg1 = LtGRFunc->getArg(1); -+ Argument *Arg2 = LtGRFunc->getArg(2); -+ Arg0->setName("buffer_arg"); -+ Arg1->setName("idx_arg"); -+ Arg2->setName("reduce_list_arg"); -+ -+ Value *BufferArgAlloca = -+ Builder.CreateAlloca(PtrTy, nullptr, Arg0->getName() + ".addr"); -+ Value *IdxArgAlloca = -+ Builder.CreateAlloca(Int32Ty, nullptr, Arg1->getName() + ".addr"); -+ Value *ReduceListArgAlloca = -+ Builder.CreateAlloca(PtrTy, nullptr, Arg2->getName() + ".addr"); -+ // FIXME(Jan): Compute array type -+ auto *RedListArrayTy = ArrayType::get(PtrTy, 1); -+ Value *LocalReduceList = -+ Builder.CreateAlloca(RedListArrayTy, nullptr, ".omp.reduction.red_list"); -+ -+ Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ BufferArgAlloca, PtrTy, BufferArgAlloca->getName() + ".acast"); -+ Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ IdxArgAlloca, PtrTy, IdxArgAlloca->getName() + ".acast"); -+ Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ ReduceListArgAlloca, PtrTy, ReduceListArgAlloca->getName() + ".acast"); -+ Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ LocalReduceList, PtrTy, LocalReduceList->getName() + ".acast"); -+ // FIXME(JAN): Assume a single globalized variable for now, this should be -+ // passed in -+ Type *SingleReductionTy = ReductionInfos.begin()->ElementType; -+ Type *TypeArgs[] = {SingleReductionTy}; -+ StructType *ReductionsBufferTy = -+ StructType::create(Ctx, TypeArgs, "_globalized_locals_ty"); -+ -+ Builder.CreateStore(Arg0, BufferArgAddrCast); -+ Builder.CreateStore(Arg1, IdxArgAddrCast); -+ Builder.CreateStore(Arg2, ReduceListArgAddrCast); -+ -+ Value *BufferArg = Builder.CreateLoad(PtrTy, BufferArgAddrCast, "buffer"); -+ Value *Idxs[] = {Builder.CreateLoad(Int32Ty, IdxArgAddrCast, "idxs")}; -+ // FIXME(Jan): Assume TEK_SCALAR -+ for (auto En : enumerate(ReductionInfos)) { -+ const OpenMPIRBuilder::ReductionInfo &RI = En.value(); -+ Value *TargetElementPtrPtr = Builder.CreateConstInBoundsGEP2_64( -+ RedListArrayTy, LocalReduceListAddrCast, 0, En.index()); -+ Value *BufferVD = -+ Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArg, Idxs); -+ Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32( -+ ReductionsBufferTy, BufferVD, 0, En.index()); -+ Builder.CreateStore(GlobValPtr, TargetElementPtrPtr); +@@ -3466,9 +3470,9 @@ + OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU( + const LocationDescription &Loc, InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, ArrayRef ReductionInfos, +- bool IsNoWait, bool IsTeamsReduction, bool HasDistribute, +- ReductionGenCBKind ReductionGenCBKind, std::optional GridValue, +- unsigned ReductionBufNum, Value *SrcLocInfo) { ++ bool IsNoWait, bool IsTeamsReduction, ReductionGenCBKind ReductionGenCBKind, ++ std::optional GridValue, unsigned ReductionBufNum, ++ Value *SrcLocInfo) { + if (!updateToLocation(Loc)) + return InsertPointTy(); + Builder.restoreIP(CodeGenIP); +@@ -3485,6 +3489,16 @@ + if (ReductionInfos.size() == 0) + return Builder.saveIP(); + ++ BasicBlock *ContinuationBlock = nullptr; ++ if (ReductionGenCBKind != ReductionGenCBKind::Clang) { ++ // Copied code from createReductions ++ BasicBlock *InsertBlock = Loc.IP.getBlock(); ++ ContinuationBlock = ++ InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize"); ++ InsertBlock->getTerminator()->eraseFromParent(); ++ Builder.SetInsertPoint(InsertBlock, InsertBlock->end()); + } + -+ Value *ReduceList = Builder.CreateLoad(PtrTy, ReduceListArgAddrCast); -+ Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList}); -+ Builder.CreateRetVoid(); -+ Builder.restoreIP(OldIP); -+ return LtGRFunc; -+} -+ -+/// This function emits a helper that reduces all the reduction variables from -+/// the team into the provided global buffer for the reduction variables. -+/// -+/// void list_to_global_reduce_func(void *buffer, int Idx, void *reduce_data) -+/// void *GlobPtrs[]; -+/// GlobPtrs[0] = (void*)&buffer.D0[Idx]; -+/// ... -+/// GlobPtrs[N] = (void*)&buffer.DN[Idx]; -+/// reduce_function(GlobPtrs, reduce_data); - /// Create a function with a unique name and a "void (i8*, i8*)" signature in - /// the given module and return it. --Function *getFreshReductionFunc(Module &M) { -+static Function *emitGlobalToListReduceFunction( -+ Module &M, const OpenMPIRBuilder::LocationDescription &Loc, -+ ArrayRef ReductionInfos, Function *ReduceFn, -+ OpenMPIRBuilder &OMPBuilder) { -+ IRBuilder<> &Builder = OMPBuilder.Builder; -+ OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); -+ LLVMContext &Ctx = M.getContext(); -+ Type *VoidTy = Type::getVoidTy(Ctx); -+ Type *Int32Ty = Builder.getInt32Ty(); -+ Type *PtrTy = PointerType::getUnqual(Ctx); -+ auto FuncTy = -+ FunctionType::get(VoidTy, {PtrTy, Int32Ty, PtrTy}, /* IsVarArg */ false); -+ Function *LtGRFunc = -+ Function::Create(FuncTy, GlobalVariable::InternalLinkage, -+ "_omp_reduction_global_to_list_reduce_func", &M); -+ LtGRFunc->setDoesNotRecurse(); -+ -+ BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "", LtGRFunc); -+ Builder.SetInsertPoint(EntryBlock); -+ -+ // Set arg names -+ Argument *Arg0 = LtGRFunc->getArg(0); -+ Argument *Arg1 = LtGRFunc->getArg(1); -+ Argument *Arg2 = LtGRFunc->getArg(2); -+ Arg0->setName("buffer_arg"); -+ Arg1->setName("idx_arg"); -+ Arg2->setName("reduce_list_arg"); -+ -+ Value *BufferArgAlloca = -+ Builder.CreateAlloca(PtrTy, nullptr, Arg0->getName() + ".addr"); -+ Value *IdxArgAlloca = -+ Builder.CreateAlloca(Int32Ty, nullptr, Arg1->getName() + ".addr"); -+ Value *ReduceListArgAlloca = -+ Builder.CreateAlloca(PtrTy, nullptr, Arg2->getName() + ".addr"); -+ // FIXME(Jan): Compute array type -+ auto *RedListArrayTy = ArrayType::get(PtrTy, 1); -+ Value *LocalReduceList = -+ Builder.CreateAlloca(RedListArrayTy, nullptr, ".omp.reduction.red_list"); -+ -+ Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ BufferArgAlloca, PtrTy, BufferArgAlloca->getName() + ".acast"); -+ Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ IdxArgAlloca, PtrTy, IdxArgAlloca->getName() + ".acast"); -+ Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ ReduceListArgAlloca, PtrTy, ReduceListArgAlloca->getName() + ".acast"); -+ Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast( -+ LocalReduceList, PtrTy, LocalReduceList->getName() + ".acast"); -+ // FIXME(JAN): Assume a single globalized variable for now, this should be -+ // passed in -+ Type *SingleReductionTy = ReductionInfos.begin()->ElementType; -+ Type *TypeArgs[] = {SingleReductionTy}; -+ StructType *ReductionsBufferTy = -+ StructType::create(Ctx, TypeArgs, "_globalized_locals_ty"); -+ -+ Builder.CreateStore(Arg0, BufferArgAddrCast); -+ Builder.CreateStore(Arg1, IdxArgAddrCast); -+ Builder.CreateStore(Arg2, ReduceListArgAddrCast); -+ -+ Value *BufferArg = Builder.CreateLoad(PtrTy, BufferArgAddrCast, "buffer"); -+ Value *Idxs[] = {Builder.CreateLoad(Int32Ty, IdxArgAddrCast, "idxs")}; -+ // FIXME(Jan): Assume TEK_SCALAR -+ for (auto En : enumerate(ReductionInfos)) { -+ const OpenMPIRBuilder::ReductionInfo &RI = En.value(); -+ Value *TargetElementPtrPtr = Builder.CreateConstInBoundsGEP2_64( -+ RedListArrayTy, LocalReduceListAddrCast, 0, En.index()); -+ Value *BufferVD = -+ Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArg, Idxs); -+ Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32( -+ ReductionsBufferTy, BufferVD, 0, En.index()); -+ Builder.CreateStore(GlobValPtr, TargetElementPtrPtr); + Function *CurFunc = Builder.GetInsertBlock()->getParent(); + AttributeList FuncAttrs; + AttrBuilder AttrBldr(Ctx); +@@ -3640,11 +3654,21 @@ + ReductionFunc; + }); + } else { +- assert(false && "Unhandled ReductionGenCBKind"); ++ Value *LHSValue = Builder.CreateLoad(RI.ElementType, LHS, "final.lhs"); ++ Value *RHSValue = Builder.CreateLoad(RI.ElementType, RHS, "final.rhs"); ++ Value *Reduced; ++ InsertPointOrErrorTy AfterIP = ++ RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced); ++ if (!AfterIP) ++ return AfterIP.takeError(); ++ Builder.CreateStore(Reduced, LHS, false); + } + } + emitBlock(ExitBB, CurFunc); +- ++ if (ContinuationBlock) { ++ Builder.CreateBr(ContinuationBlock); ++ Builder.SetInsertPoint(ContinuationBlock); + } -+ -+ Value *ReduceList = Builder.CreateLoad(PtrTy, ReduceListArgAddrCast); -+ Builder.CreateCall(ReduceFn, {ReduceList, LocalReduceListAddrCast}); -+ Builder.CreateRetVoid(); -+ Builder.restoreIP(OldIP); -+ return LtGRFunc; -+} -+ -+static Function *getFreshReductionFunc(Module &M) { - Type *VoidTy = Type::getVoidTy(M.getContext()); - Type *Int8PtrTy = PointerType::getUnqual(M.getContext()); - auto *FuncTy = - FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false); - return Function::Create(FuncTy, GlobalVariable::InternalLinkage, -- M.getDataLayout().getDefaultGlobalsAddressSpace(), + Config.setEmitLLVMUsed(); + + return Builder.saveIP(); +@@ -3659,27 +3683,95 @@ ".omp.reduction.func", &M); } --OpenMPIRBuilder::InsertPointTy +-OpenMPIRBuilder::InsertPointOrErrorTy -OpenMPIRBuilder::createReductions(const LocationDescription &Loc, - InsertPointTy AllocaIP, - ArrayRef ReductionInfos, - ArrayRef IsByRef, bool IsNoWait) { - assert(ReductionInfos.size() == IsByRef.size()); - for (const ReductionInfo &RI : ReductionInfos) { -+static void populateReductionFunction( +- (void)RI; +- assert(RI.Variable && "expected non-null variable"); +- assert(RI.PrivateVariable && "expected non-null private variable"); +- assert(RI.ReductionGen && "expected non-null reduction generator callback"); +- assert(RI.Variable->getType() == RI.PrivateVariable->getType() && +- "expected variables and their private equivalents to have the same " +- "type"); +- assert(RI.Variable->getType()->isPointerTy() && +- "expected variables to be pointers"); ++static Error populateReductionFunction( + Function *ReductionFunc, + ArrayRef ReductionInfos, + IRBuilder<> &Builder, ArrayRef IsByRef, bool IsGPU) { @@ -11437,8 +11007,8 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu + } else { + LHSArrayPtr = ReductionFunc->getArg(0); + RHSArrayPtr = ReductionFunc->getArg(1); -+ } -+ + } + + unsigned NumReductions = ReductionInfos.size(); + Type *RedArrayTy = ArrayType::get(Builder.getPtrTy(), NumReductions); + @@ -11457,191 +11027,35 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu + RHSI8Ptr, RI.PrivateVariable->getType()); + Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr); + Value *Reduced; -+ Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced)); ++ OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = ++ RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced); ++ if (!AfterIP) ++ return AfterIP.takeError(); ++ ++ Builder.restoreIP(*AfterIP); ++ // TODO: Consider flagging an error. + if (!Builder.GetInsertBlock()) -+ return; ++ return Error::success(); ++ + // store is inside of the reduction region when using by-ref + if (!IsByRef[En.index()]) + Builder.CreateStore(Reduced, LHSPtr); + } + Builder.CreateRetVoid(); ++ return Error::success(); +} + -+static void -+checkReductionInfos(ArrayRef ReductionInfos, -+ bool IsGPU) { -+ for (const OpenMPIRBuilder::ReductionInfo &RI : ReductionInfos) { - (void)RI; - assert(RI.Variable && "expected non-null variable"); - assert(RI.PrivateVariable && "expected non-null private variable"); - assert(RI.ReductionGen && "expected non-null reduction generator callback"); -- assert(RI.Variable->getType() == RI.PrivateVariable->getType() && -- "expected variables and their private equivalents to have the same " -- "type"); -+ // JAN: Skip this assertion for GPU, address spaces are present -+ if (!IsGPU) { -+ assert( -+ RI.Variable->getType() == RI.PrivateVariable->getType() && -+ "expected variables and their private equivalents to have the same " -+ "type"); -+ } - assert(RI.Variable->getType()->isPointerTy() && - "expected variables to be pointers"); - } -+} -+ -+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductionsGPU( -+ const LocationDescription &Loc, InsertPointTy AllocaIP, -+ ArrayRef ReductionInfos, ArrayRef IsByRef, -+ bool IsNoWait, bool IsTeamsReduction, bool HasDistribute) { -+ checkReductionInfos(ReductionInfos, /*IsGPU*/ true); -+ LLVMContext &Ctx = M.getContext(); -+ if (!updateToLocation(Loc)) -+ return InsertPointTy(); -+ -+ if (ReductionInfos.size() == 0) -+ return Builder.saveIP(); -+ -+ assert(ReductionInfos.size() == 1 && "More than one reduction variable"); -+ -+ // Copied code from createReductions -+ BasicBlock *InsertBlock = Loc.IP.getBlock(); -+ BasicBlock *ContinuationBlock = -+ InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize"); -+ InsertBlock->getTerminator()->eraseFromParent(); -+ Builder.SetInsertPoint(InsertBlock, InsertBlock->end()); -+ -+ Function *ReductionFunc = nullptr; -+ if (GLOBAL_ReductionFunc) { -+ ReductionFunc = GLOBAL_ReductionFunc; -+ } else { -+ ReductionFunc = getFreshReductionFunc(M); -+ GLOBAL_ReductionFunc = ReductionFunc; -+ InsertPointTy CurIP = Builder.saveIP(); -+ populateReductionFunction(ReductionFunc, ReductionInfos, Builder, IsByRef, -+ true); -+ Builder.restoreIP(CurIP); -+ } -+ -+ uint32_t SrcLocStrSize; -+ Constant *SrcLocStr = getOrCreateDefaultSrcLocStr(SrcLocStrSize); -+ Value *RTLoc = -+ getOrCreateIdent(SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(0), 0); -+ -+ // 1. Build a list of reduction variables -+ auto Size = ReductionInfos.size(); -+ // FIXME(JAN): skipping variably modified type storage for array size -+ Type *PtrTy = PointerType::getUnqual(Ctx); -+ Type *RedArrayTy = ArrayType::get(PtrTy, Size); -+ InsertPointTy CurIP = Builder.saveIP(); -+ Builder.restoreIP(AllocaIP); -+ Value *ReductionListAlloca = -+ Builder.CreateAlloca(RedArrayTy, nullptr, ".omp.reduction.red_list"); -+ Value *ReductionList = -+ Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionListAlloca, PtrTy); -+ Builder.restoreIP(CurIP); -+ for (auto En : enumerate(ReductionInfos)) { -+ const ReductionInfo &RI = En.value(); -+ Value *ElemPtr = Builder.CreateConstGEP2_64(RedArrayTy, ReductionList, 0, -+ En.index(), "elem_ptr"); -+ Value *CastElem = -+ Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy); -+ Builder.CreateStore(CastElem, ElemPtr); -+ } -+ CurIP = Builder.saveIP(); -+ Function *SarFunc = emitShuffleAndReduceFunction(M, Loc, ReductionInfos, -+ ReductionFunc, *this); -+ Function *WcFunc = emitInterWarpCopyFunction(M, Loc, ReductionInfos, *this); -+ Builder.restoreIP(CurIP); -+ -+ Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy); -+ Value *ReductionDataSize = -+ getTypeSizeInBytesValue(Builder, M, ReductionInfos.begin()->ElementType); -+ -+ Value *Res; -+ if (!IsTeamsReduction) { -+ Value *SarFuncCast = -+ Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, PtrTy); -+ Value *WcFuncCast = -+ Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, PtrTy); -+ Value *Args[] = {RTLoc, ReductionDataSize, RL, SarFuncCast, WcFuncCast}; -+ Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr( -+ RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2); -+ Res = Builder.CreateCall(Pv2Ptr, Args); -+ } else { -+ CurIP = Builder.saveIP(); -+ Function *LtGCFunc = -+ emitListToGlobalCopyFunction(M, Loc, ReductionInfos, *this); -+ Function *LtGRFunc = emitListToGlobalReduceFunction(M, Loc, ReductionInfos, -+ ReductionFunc, *this); -+ Function *GtLCFunc = -+ emitGlobalToListCopyFunction(M, Loc, ReductionInfos, *this); -+ Function *GtLRFunc = emitGlobalToListReduceFunction(M, Loc, ReductionInfos, -+ ReductionFunc, *this); -+ Builder.restoreIP(CurIP); -+ -+ Function *RedFixedBuferFn = getOrCreateRuntimeFunctionPtr( -+ RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer); -+ -+ Value *KernelTeamsReductionPtr = Builder.CreateCall(RedFixedBuferFn, {}); -+ -+ Value *Args3[] = {RTLoc, -+ KernelTeamsReductionPtr, -+ Builder.getInt32(1024), -+ ReductionDataSize, -+ RL, -+ SarFunc, -+ WcFunc, -+ LtGCFunc, -+ LtGRFunc, -+ GtLCFunc, -+ GtLRFunc}; -+ -+ Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr( -+ RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2); -+ Res = Builder.CreateCall(TeamsReduceFn, Args3); -+ } -+ -+ if (IsTeamsReduction || !HasDistribute) { -+ Function *CurFunc = Builder.GetInsertBlock()->getParent(); -+ BasicBlock *ExitBB = -+ BasicBlock::Create(Ctx, ".omp.reduction.done", CurFunc); -+ BasicBlock *ThenBB = -+ BasicBlock::Create(Ctx, ".omp.reduction.then", CurFunc); -+ Value *Cond = Builder.CreateICmpEQ(Res, Builder.getInt32(1)); -+ Builder.CreateCondBr(Cond, ThenBB, ExitBB); -+ -+ Builder.SetInsertPoint(ThenBB); -+ for (auto En : enumerate(ReductionInfos)) { -+ const ReductionInfo &RI = En.value(); -+ Value *InputVal = Builder.CreateLoad(RI.ElementType, RI.Variable); -+ Value *RedVal = Builder.CreateLoad( -+ RI.ElementType, Builder.CreatePointerBitCastOrAddrSpaceCast( -+ RI.PrivateVariable, PtrTy)); -+ Value *sum; -+ Builder.restoreIP( -+ RI.ReductionGen(Builder.saveIP(), InputVal, RedVal, sum)); -+ Builder.CreateStore(sum, RI.Variable); -+ Builder.CreateBr(ExitBB); -+ } -+ Builder.SetInsertPoint(ExitBB); -+ } -+ Builder.CreateBr(ContinuationBlock); -+ Builder.SetInsertPoint(ContinuationBlock); -+ return Builder.saveIP(); -+} -+ -+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions( ++OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions( + const LocationDescription &Loc, InsertPointTy AllocaIP, + ArrayRef ReductionInfos, ArrayRef IsByRef, -+ bool IsNoWait, bool IsTeamsReduction, bool HasDistribute) { ++ bool IsNoWait, bool IsTeamsReduction) { + assert(ReductionInfos.size() == IsByRef.size()); + if (Config.isGPU()) -+ return createReductionsGPU(Loc, AllocaIP, ReductionInfos, IsByRef, IsNoWait, -+ IsTeamsReduction, HasDistribute); ++ return createReductionsGPU(Loc, AllocaIP, Builder.saveIP(), ReductionInfos, ++ IsNoWait, IsTeamsReduction); + + checkReductionInfos(ReductionInfos, /*IsGPU*/ false); - ++ if (!updateToLocation(Loc)) return InsertPointTy(); @@ -11651,7 +11065,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu BasicBlock *InsertBlock = Loc.IP.getBlock(); BasicBlock *ContinuationBlock = InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize"); -@@ -2231,9 +3398,9 @@ +@@ -3743,9 +3835,9 @@ Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock); Switch->addCase(Builder.getInt32(2), AtomicRedBlock); @@ -11664,7 +11078,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu Builder.SetInsertPoint(NonAtomicRedBlock); for (auto En : enumerate(ReductionInfos)) { const ReductionInfo &RI = En.value(); -@@ -2287,35 +3454,8 @@ +@@ -3801,38 +3893,13 @@ // Populate the outlined reduction function using the elementwise reduction // function. Partial values are extracted from the type-erased array of // pointers to private variables. @@ -11673,7 +11087,11 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu - Builder.SetInsertPoint(ReductionFuncBlock); - Value *LHSArrayPtr = ReductionFunc->getArg(0); - Value *RHSArrayPtr = ReductionFunc->getArg(1); -- ++ Error Err = populateReductionFunction(ReductionFunc, ReductionInfos, Builder, ++ IsByRef, false); ++ if (Err) ++ return Err; + - for (auto En : enumerate(ReductionInfos)) { - const ReductionInfo &RI = En.value(); - Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64( @@ -11688,7 +11106,11 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu - Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType()); - Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr); - Value *Reduced; -- Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced)); +- InsertPointOrErrorTy AfterIP = +- RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced); +- if (!AfterIP) +- return AfterIP.takeError(); +- Builder.restoreIP(*AfterIP); - if (!Builder.GetInsertBlock()) - return InsertPointTy(); - // store is inside of the reduction region when using by-ref @@ -11696,22 +11118,61 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu - Builder.CreateStore(Reduced, LHSPtr); - } - Builder.CreateRetVoid(); -- -+ populateReductionFunction(ReductionFunc, ReductionInfos, Builder, IsByRef, -+ false); ++ if (!Builder.GetInsertBlock()) ++ return InsertPointTy(); + Builder.SetInsertPoint(ContinuationBlock); return Builder.saveIP(); - } -@@ -2434,7 +3574,7 @@ - CL->assertOK(); - #endif +@@ -3984,11 +4051,9 @@ return CL; --} -+ } + } + +-Expected OpenMPIRBuilder::createCanonicalLoop( +- const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, +- Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, +- InsertPointTy ComputeIP, const Twine &Name) { +- ++Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount( ++ const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, ++ bool IsSigned, bool InclusiveStop, const Twine &Name) { + // Consider the following difficulties (assuming 8-bit signed integers): + // * Adding \p Step to the loop counter which passes \p Stop may overflow: + // DO I = 1, 100, 50 +@@ -4000,9 +4065,7 @@ + assert(IndVarTy == Stop->getType() && "Stop type mismatch"); + assert(IndVarTy == Step->getType() && "Step type mismatch"); + +- LocationDescription ComputeLoc = +- ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc; +- updateToLocation(ComputeLoc); ++ updateToLocation(Loc); + + ConstantInt *Zero = ConstantInt::get(IndVarTy, 0); + ConstantInt *One = ConstantInt::get(IndVarTy, 1); +@@ -4042,8 +4105,20 @@ + Value *OneCmp = Builder.CreateICmp(CmpInst::ICMP_ULE, Span, Incr); + CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo); + } +- Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping, +- "omp_" + Name + ".tripcount"); ++ ++ return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping, ++ "omp_" + Name + ".tripcount"); ++} ++ ++Expected OpenMPIRBuilder::createCanonicalLoop( ++ const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, ++ Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, ++ InsertPointTy ComputeIP, const Twine &Name) { ++ LocationDescription ComputeLoc = ++ ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc; ++ ++ Value *TripCount = calculateCanonicalLoopTripCount( ++ ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name); - CanonicalLoopInfo * - OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, -@@ -2608,7 +3748,8 @@ + auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) { + Builder.restoreIP(CodeGenIP); +@@ -4127,7 +4202,8 @@ PUpperBound, PStride, One, Zero}); Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound); Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound); @@ -11721,7 +11182,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One); CLI->setTripCount(TripCount); -@@ -2753,6 +3894,7 @@ +@@ -4290,6 +4366,7 @@ IsLastChunk, CountUntilOrigTripCount, ChunkRange, "omp_chunk.tripcount"); Value *BackcastedChunkTC = Builder.CreateTrunc(ChunkTripCount, IVTy, "omp_chunk.tripcount.trunc"); @@ -11729,7 +11190,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu CLI->setTripCount(BackcastedChunkTC); // Update all uses of the induction variable except the one in the condition -@@ -2828,10 +3970,24 @@ +@@ -4369,10 +4446,24 @@ static void createTargetLoopWorkshareCall( OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, @@ -11756,7 +11217,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu FunctionCallee RTLFn = getKmpcForStaticLoopForType(TripCountTy, OMPBuilder, LoopType); SmallVector RealArgs; -@@ -2841,6 +3997,7 @@ +@@ -4382,6 +4473,7 @@ RealArgs.push_back(TripCount); if (LoopType == WorksharingLoopType::DistributeStaticLoop) { RealArgs.push_back(ConstantInt::get(TripCountTy, 0)); @@ -11764,7 +11225,45 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu Builder.CreateCall(RTLFn, RealArgs); return; } -@@ -4574,10 +5731,9 @@ +@@ -5307,8 +5399,8 @@ + for (auto &AlignedItem : AlignedVars) { + Value *AlignedPtr = AlignedItem.first; + Value *Alignment = AlignedItem.second; +- Builder.CreateAlignmentAssumption(F->getDataLayout(), +- AlignedPtr, Alignment); ++ Builder.CreateAlignmentAssumption(F->getDataLayout(), AlignedPtr, ++ Alignment); + } + Builder.restoreIP(IP); + } +@@ -5456,16 +5548,16 @@ + Loop *L = LI.getLoopFor(CLI->getHeader()); + assert(L && "Expecting CanonicalLoopInfo to be recognized as a loop"); + +- TargetTransformInfo::UnrollingPreferences UP = +- gatherUnrollingPreferences(L, SE, TTI, +- /*BlockFrequencyInfo=*/nullptr, +- /*ProfileSummaryInfo=*/nullptr, ORE, static_cast(OptLevel), +- /*UserThreshold=*/std::nullopt, +- /*UserCount=*/std::nullopt, +- /*UserAllowPartial=*/true, +- /*UserAllowRuntime=*/true, +- /*UserUpperBound=*/std::nullopt, +- /*UserFullUnrollMaxCount=*/std::nullopt); ++ TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( ++ L, SE, TTI, ++ /*BlockFrequencyInfo=*/nullptr, ++ /*ProfileSummaryInfo=*/nullptr, ORE, static_cast(OptLevel), ++ /*UserThreshold=*/std::nullopt, ++ /*UserCount=*/std::nullopt, ++ /*UserAllowPartial=*/true, ++ /*UserAllowRuntime=*/true, ++ /*UserUpperBound=*/std::nullopt, ++ /*UserFullUnrollMaxCount=*/std::nullopt); + + UP.Force = true; + +@@ -6128,10 +6220,11 @@ return Builder.CreateCall(Fn, Args); } @@ -11775,12 +11274,14 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit( + const LocationDescription &Loc, bool IsSPMD, + const llvm::OpenMPIRBuilder::TargetKernelDefaultBounds &Bounds) { ++ assert(!Bounds.MaxThreads.empty() && !Bounds.MaxTeams.empty() && ++ "expected num_threads and num_teams to be specified"); if (!updateToLocation(Loc)) return Loc.IP; -@@ -4592,28 +5748,32 @@ - - Function *Kernel = Builder.GetInsertBlock()->getParent(); +@@ -6156,28 +6249,32 @@ + assert(Kernel && "Expected the real kernel to exist"); + } + // Set the grid value in the config needed for lowering later on + Config.setGridValue(getGridValue(T, Kernel)); @@ -11789,7 +11290,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu // environment. - if (MinTeamsVal > 1 || MaxTeamsVal > 0) - writeTeamsForKernel(T, *Kernel, MinTeamsVal, MaxTeamsVal); -- ++ if (Bounds.MinTeams > 1 || Bounds.MaxTeams.front() > 0) ++ writeTeamsForKernel(T, *Kernel, Bounds.MinTeams, Bounds.MaxTeams.front()); + -#if FIX_NUM_THREADS_ISSUE - //breaks 534.hpgmg - // For max values, < 0 means unset, == 0 means set but unknown. @@ -11800,9 +11303,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu - - if (MaxThreadsVal > 0) - writeThreadBoundsForKernel(T, *Kernel, MinThreadsVal, MaxThreadsVal); -+ if (Bounds.MinTeams > 1 || Bounds.MaxTeams > 0) -+ writeTeamsForKernel(T, *Kernel, Bounds.MinTeams, Bounds.MaxTeams); - +- - Constant *MinThreads = ConstantInt::getSigned(Int32, MinThreadsVal); - Constant *MaxThreads = ConstantInt::getSigned(Int32, MaxThreadsVal); - Constant *MinTeams = ConstantInt::getSigned(Int32, MinTeamsVal); @@ -11811,7 +11312,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu - Constant *ReductionBufferLength = ConstantInt::getSigned(Int32, 0); + // If MaxThreads not set, select the maximum between the default workgroup + // size and the MinThreads value. -+ int32_t MaxThreadsValue = Bounds.MaxThreads; ++ int32_t MaxThreadsValue = Bounds.MaxThreads.front(); + if (MaxThreadsValue < 0) + MaxThreadsValue = std::max( + int32_t(getGridValue(T, Kernel).GV_Default_WG_Size), Bounds.MinThreads); @@ -11822,27 +11323,25 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu + Constant *MinThreads = ConstantInt::getSigned(Int32, Bounds.MinThreads); + Constant *MaxThreads = ConstantInt::getSigned(Int32, MaxThreadsValue); + Constant *MinTeams = ConstantInt::getSigned(Int32, Bounds.MinTeams); -+ Constant *MaxTeams = ConstantInt::getSigned(Int32, Bounds.MaxTeams); ++ Constant *MaxTeams = ConstantInt::getSigned(Int32, Bounds.MaxTeams.front()); + Constant *ReductionDataSize = + ConstantInt::getSigned(Int32, Bounds.ReductionDataSize); + Constant *ReductionBufferLength = + ConstantInt::getSigned(Int32, Bounds.ReductionBufferLength); - // We need to strip the debug prefix to get the correct kernel name. - StringRef KernelName = Kernel->getName(); -@@ -4672,9 +5832,8 @@ - ? KernelEnvironmentGV - : ConstantExpr::getAddrSpaceCast(KernelEnvironmentGV, - KernelEnvironmentPtr); -- Value *KernelLaunchEnvironment = Kernel->getArg(0); - CallInst *ThreadKind = -- Builder.CreateCall(Fn, {KernelEnvironment, KernelLaunchEnvironment}); -+ Builder.CreateCall(Fn, {KernelEnvironment, Kernel->getArg(0)}); - - Value *ExecUserCode = Builder.CreateICmpEQ( - ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), -@@ -5116,6 +6275,45 @@ - return getOrCreateRuntimeFunction(M, Name); + Function *Fn = getOrCreateRuntimeFunctionPtr( + omp::RuntimeFunction::OMPRTL___kmpc_target_init); +@@ -6497,7 +6594,7 @@ + if (Config.IsTargetDevice.value_or(false)) { + if (BodyGenCB) { + InsertPointOrErrorTy AfterIP = +- BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv); ++ BodyGenCB(CodeGenIP, BodyGenTy::NoPriv); + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); +@@ -6762,9 +6859,49 @@ + return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit); } +static void emitUsed(StringRef Name, std::vector &List, @@ -11884,13 +11383,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu + LLVMCompilerUsed.emplace_back(GVMode); +} + - static void replaceConstatExprUsesInFuncWithInstr(ConstantExpr *ConstExpr, - Function *Func) { - for (User *User : make_early_inc_range(ConstExpr->users())) { -@@ -5138,8 +6336,9 @@ - } - - static Function *createOutlinedFunction( + static Expected createOutlinedFunction( - OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, StringRef FuncName, - SmallVectorImpl &Inputs, + OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsSPMD, @@ -11899,9 +11392,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) { SmallVector ParameterTypes; -@@ -5165,6 +6364,24 @@ - auto Func = Function::Create(FuncType, GlobalValue::InternalLinkage, FuncName, - Builder.GetInsertBlock()->getModule()); +@@ -6792,6 +6929,24 @@ + auto Func = + Function::Create(FuncType, GlobalValue::InternalLinkage, FuncName, M); + // Forward target-cpu and target-features function attributes from the + // original function to the new outlined function. @@ -11917,14 +11410,14 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu + + if (OMPBuilder.Config.isTargetDevice()) { + std::vector LLVMCompilerUsed; -+ emitExecutionMode(OMPBuilder, Builder, FuncName, false, LLVMCompilerUsed); ++ emitExecutionMode(OMPBuilder, Builder, FuncName, IsSPMD, LLVMCompilerUsed); + Type *Int8PtrTy = Type::getInt8Ty(Builder.getContext())->getPointerTo(); + emitUsed("llvm.compiler.used", LLVMCompilerUsed, Int8PtrTy, OMPBuilder.M); + } // Save insert point. - auto OldInsertPoint = Builder.saveIP(); - -@@ -5174,7 +6391,8 @@ + IRBuilder<>::InsertPointGuard IPG(Builder); + // If there's a DISubprogram associated with current function, then +@@ -6831,7 +6986,8 @@ // Insert target init call in the device compilation pass. if (OMPBuilder.Config.isTargetDevice()) @@ -11934,21 +11427,20 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock(); -@@ -5241,16 +6459,18 @@ +@@ -7026,15 +7182,17 @@ } - static void emitTargetOutlinedFunction( -- OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, + static Error emitTargetOutlinedFunction( +- OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, - TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn, - Constant *&OutlinedFnID, SmallVectorImpl &Inputs, + OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsSPMD, -+ TargetRegionEntryInfo &EntryInfo, ++ bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, + const OpenMPIRBuilder::TargetKernelDefaultBounds &DefaultBounds, + Function *&OutlinedFn, Constant *&OutlinedFnID, + SmallVectorImpl &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) { - OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction = - [&OMPBuilder, &Builder, &Inputs, &CBFunc, - &ArgAccessorFuncCB](StringRef EntryFnName) { @@ -11959,129 +11451,256 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu CBFunc, ArgAccessorFuncCB); }; -@@ -5258,12 +6478,14 @@ - OutlinedFn, OutlinedFnID); - } +@@ -7331,9 +7489,11 @@ --static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, -- OpenMPIRBuilder::InsertPointTy AllocaIP, -- Function *OutlinedFn, Constant *OutlinedFnID, -- int32_t NumTeams, int32_t NumThreads, -- SmallVectorImpl &Args, -- OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB) { -+static void -+emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, + static void + emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, +- OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn, +- Constant *OutlinedFnID, ArrayRef NumTeams, +- ArrayRef NumThreads, SmallVectorImpl &Args, + OpenMPIRBuilder::InsertPointTy AllocaIP, + const OpenMPIRBuilder::TargetKernelDefaultBounds &DefaultBounds, + const OpenMPIRBuilder::TargetKernelRuntimeBounds &RuntimeBounds, + Function *OutlinedFn, Constant *OutlinedFnID, -+ SmallVectorImpl &Args, -+ OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB) { - - OpenMPIRBuilder::TargetDataInfo Info( - /*RequiresDevicePointerInfo=*/false, -@@ -5288,22 +6510,56 @@ - unsigned NumTargetItems = MapInfo.BasePointers.size(); - // TODO: Use correct device ID - Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF); -- Value *NumTeamsVal = Builder.getInt32(NumTeams); -- Value *NumThreadsVal = Builder.getInt32(NumThreads); - uint32_t SrcLocStrSize; - Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); - Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize, - llvm::omp::IdentFlag(0), 0); -- // TODO: Use correct NumIterations -- Value *NumIterations = Builder.getInt64(0); -+ -+ Value *TripCount = RuntimeBounds.LoopTripCount -+ ? Builder.CreateIntCast(RuntimeBounds.LoopTripCount, -+ Builder.getInt64Ty(), -+ /*isSigned=*/false) -+ : Builder.getInt64(0); -+ -+ Value *NumTeams = RuntimeBounds.MaxTeams -+ ? RuntimeBounds.MaxTeams -+ : Builder.getInt32(DefaultBounds.MaxTeams); -+ -+ // Calculate number of threads: 0 if no clauses specified, otherwise it is the -+ // minimum between optional THREAD_LIMIT and MAX_THREADS clauses. Perform a -+ // type cast to uint32. -+ auto InitMaxThreadsClause = [&Builder](Value *Clause) { -+ if (Clause) -+ Clause = Builder.CreateIntCast(Clause, Builder.getInt32Ty(), -+ /*isSigned=*/false); -+ return Clause; ++ SmallVectorImpl &Args, Value *IfCond, + OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, + SmallVector Dependencies = {}, + bool HasNoWait = false) { +@@ -7379,9 +7539,7 @@ + return Error::success(); + }; + +- // If we don't have an ID for the target region, it means an offload entry +- // wasn't created. In this case we just run the host fallback directly. +- if (!OutlinedFnID) { ++ auto &&EmitTargetCallElse = [&]() { + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = [&]() { + if (RequiresOuterTargetTask) { + // Arguments that are intended to be directly forwarded to an +@@ -7398,66 +7556,142 @@ + // produce any. The 'if' check enables accessing the returned value. + if (AfterIP) + Builder.restoreIP(*AfterIP); + }; + -+ auto CombineMaxThreadsClauses = [&Builder](Value *Clause, Value *&Result) { -+ if (Clause) -+ Result = Result -+ ? Builder.CreateSelect(Builder.CreateICmpULT(Result, Clause), ++ auto &&EmitTargetCallThen = [&]() { ++ OpenMPIRBuilder::TargetDataInfo Info( ++ /*RequiresDevicePointerInfo=*/false, ++ /*SeparateBeginEndCalls=*/true); ++ ++ OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP()); ++ OpenMPIRBuilder::TargetDataRTArgs RTArgs; ++ OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info, ++ RTArgs, MapInfo, ++ /*IsNonContiguous=*/true, ++ /*ForEndCall=*/false); ++ ++ SmallVector NumTeamsC; ++ for (auto [DefNumTeams, RtNumTeams] : ++ llvm::zip_equal(DefaultBounds.MaxTeams, RuntimeBounds.MaxTeams)) { ++ NumTeamsC.push_back(RtNumTeams ? RtNumTeams ++ : Builder.getInt32(DefNumTeams)); ++ } ++ ++ // Calculate number of threads: 0 if no clauses specified, otherwise it is ++ // the minimum between optional THREAD_LIMIT and MAX_THREADS clauses. ++ // Perform a type cast to uint32. ++ auto InitMaxThreadsClause = [&Builder](Value *Clause) { ++ if (Clause) ++ Clause = Builder.CreateIntCast(Clause, Builder.getInt32Ty(), ++ /*isSigned=*/false); ++ return Clause; ++ }; ++ ++ auto CombineMaxThreadsClauses = [&Builder](Value *Clause, Value *&Result) { ++ if (Clause) ++ Result = ++ Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result, Clause), + Result, Clause) + : Clause; -+ }; ++ }; ++ ++ // TODO: Check if this is the correct handling for multi-dim thread_limit. ++ SmallVector NumThreadsC; ++ Value *MaxThreadsClause = InitMaxThreadsClause(RuntimeBounds.MaxThreads); ++ ++ for (auto [RtTeamsThreadLimit, RtTargetThreadLimit] : llvm::zip_equal( ++ RuntimeBounds.TeamsThreadLimit, RuntimeBounds.TargetThreadLimit)) { ++ Value *TeamsThreadLimitClause = InitMaxThreadsClause(RtTeamsThreadLimit); ++ Value *NumThreads = InitMaxThreadsClause(RtTargetThreadLimit); ++ ++ CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads); ++ CombineMaxThreadsClauses(MaxThreadsClause, NumThreads); + -+ Value *MaxThreadsClause = InitMaxThreadsClause(RuntimeBounds.MaxThreads); -+ Value *TeamsThreadLimitClause = -+ InitMaxThreadsClause(RuntimeBounds.TeamsThreadLimit); -+ Value *NumThreads = InitMaxThreadsClause(RuntimeBounds.TargetThreadLimit); -+ CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads); -+ CombineMaxThreadsClauses(MaxThreadsClause, NumThreads); ++ NumThreadsC.push_back(NumThreads ? NumThreads : Builder.getInt32(0)); ++ } ++ ++ unsigned NumTargetItems = Info.NumberOfPtrs; ++ // TODO: Use correct device ID ++ Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF); ++ uint32_t SrcLocStrSize; ++ Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); ++ Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize, ++ llvm::omp::IdentFlag(0), 0); ++ ++ Value *TripCount = RuntimeBounds.LoopTripCount ++ ? Builder.CreateIntCast(RuntimeBounds.LoopTripCount, ++ Builder.getInt64Ty(), ++ /*isSigned=*/false) ++ : Builder.getInt64(0); ++ ++ // TODO: Use correct DynCGGroupMem ++ Value *DynCGGroupMem = Builder.getInt32(0); ++ ++ KArgs = OpenMPIRBuilder::TargetKernelArgs(NumTargetItems, RTArgs, TripCount, ++ NumTeamsC, NumThreadsC, ++ DynCGGroupMem, HasNoWait); ++ ++ // The presence of certain clauses on the target directive require the ++ // explicit generation of the target task. ++ OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = [&]() { ++ if (RequiresOuterTargetTask) ++ return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP, ++ Dependencies, HasNoWait); ++ ++ return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID, ++ EmitTargetCallFallbackCB, KArgs, ++ DeviceID, RTLoc, AllocaIP); ++ }(); + -+ if (!NumThreads) -+ NumThreads = Builder.getInt32(0); ++ // Assume no error was returned because TaskBodyCB and ++ // EmitTargetCallFallbackCB don't produce any. The 'if' check enables ++ // accessing the returned value. ++ if (AfterIP) ++ Builder.restoreIP(*AfterIP); ++ }; + - // TODO: Use correct DynCGGroupMem - Value *DynCGGroupMem = Builder.getInt32(0); ++ // If we don't have an ID for the target region, it means an offload entry ++ // wasn't created. In this case we just run the host fallback directly. ++ if (!OutlinedFnID) { ++ EmitTargetCallElse(); + return; + } + +- OpenMPIRBuilder::TargetDataInfo Info( +- /*RequiresDevicePointerInfo=*/false, +- /*SeparateBeginEndCalls=*/true); +- +- OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP()); +- OpenMPIRBuilder::TargetDataRTArgs RTArgs; +- OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info, +- RTArgs, MapInfo, +- /*IsNonContiguous=*/true, +- /*ForEndCall=*/false); - - bool HasNoWait = false; - -- OpenMPIRBuilder::TargetKernelArgs KArgs(NumTargetItems, RTArgs, NumIterations, -- NumTeamsVal, NumThreadsVal, -- DynCGGroupMem, HasNoWait); -+ OpenMPIRBuilder::TargetKernelArgs KArgs(NumTargetItems, RTArgs, TripCount, -+ NumTeams, NumThreads, DynCGGroupMem, -+ HasNoWait); - - Builder.restoreIP(OMPBuilder.emitKernelLaunch( - Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, KArgs, -@@ -5311,10 +6567,11 @@ +- SmallVector NumTeamsC; +- SmallVector NumThreadsC; +- for (auto V : NumTeams) +- NumTeamsC.push_back(llvm::ConstantInt::get(Builder.getInt32Ty(), V)); +- for (auto V : NumThreads) +- NumThreadsC.push_back(llvm::ConstantInt::get(Builder.getInt32Ty(), V)); +- +- unsigned NumTargetItems = Info.NumberOfPtrs; +- // TODO: Use correct device ID +- Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF); +- uint32_t SrcLocStrSize; +- Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); +- Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize, +- llvm::omp::IdentFlag(0), 0); +- // TODO: Use correct NumIterations +- Value *NumIterations = Builder.getInt64(0); +- // TODO: Use correct DynCGGroupMem +- Value *DynCGGroupMem = Builder.getInt32(0); +- +- KArgs = OpenMPIRBuilder::TargetKernelArgs( +- NumTargetItems, RTArgs, NumIterations, NumTeamsC, NumThreadsC, +- DynCGGroupMem, HasNoWait); +- +- // The presence of certain clauses on the target directive require the +- // explicit generation of the target task. +- OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = [&]() { +- if (RequiresOuterTargetTask) +- return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP, +- Dependencies, HasNoWait); +- +- return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID, +- EmitTargetCallFallbackCB, KArgs, +- DeviceID, RTLoc, AllocaIP); +- }(); ++ // If there's no IF clause, only generate the kernel launch code path. ++ if (!IfCond) { ++ EmitTargetCallThen(); ++ return; ++ } + +- // Assume no error was returned because TaskBodyCB and +- // EmitTargetCallFallbackCB don't produce any. The 'if' check enables +- // accessing the returned value. +- if (AfterIP) +- Builder.restoreIP(*AfterIP); ++ // Create if-else to handle IF clause. ++ llvm::BasicBlock *ThenBlock = ++ BasicBlock::Create(Builder.getContext(), "omp_if.then"); ++ llvm::BasicBlock *ElseBlock = ++ BasicBlock::Create(Builder.getContext(), "omp_if.else"); ++ llvm::BasicBlock *ContBlock = ++ BasicBlock::Create(Builder.getContext(), "omp_if.end"); ++ Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock); ++ ++ Function *CurFn = Builder.GetInsertBlock()->getParent(); ++ ++ // Emit the 'then' code. ++ OMPBuilder.emitBlock(ThenBlock, CurFn); ++ EmitTargetCallThen(); ++ OMPBuilder.emitBranch(ContBlock); ++ // Emit the 'else' code. ++ OMPBuilder.emitBlock(ElseBlock, CurFn); ++ EmitTargetCallElse(); ++ OMPBuilder.emitBranch(ContBlock); ++ // Emit the continuation block. ++ OMPBuilder.emitBlock(ContBlock, CurFn, /*IsFinished=*/true); } - OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget( -- const LocationDescription &Loc, InsertPointTy AllocaIP, -- InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, -- int32_t NumThreads, SmallVectorImpl &Args, -- GenMapInfoCallbackTy GenMapInfoCB, -+ const LocationDescription &Loc, bool IsSPMD, InsertPointTy AllocaIP, -+ InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, + OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget( +- const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocaIP, +- InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, +- ArrayRef NumTeams, ArrayRef NumThreads, ++ const LocationDescription &Loc, bool IsSPMD, bool IsOffloadEntry, ++ Value *IfCond, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ++ TargetRegionEntryInfo &EntryInfo, + const TargetKernelDefaultBounds &DefaultBounds, + const TargetKernelRuntimeBounds &RuntimeBounds, -+ SmallVectorImpl &Args, GenMapInfoCallbackTy GenMapInfoCB, + SmallVectorImpl &Args, GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc, - OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB) { - if (!updateToLocation(Loc)) -@@ -5324,11 +6581,12 @@ - - Function *OutlinedFn; - Constant *OutlinedFnID; -- emitTargetOutlinedFunction(*this, Builder, EntryInfo, OutlinedFn, -- OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB); -+ emitTargetOutlinedFunction(*this, Builder, IsSPMD, EntryInfo, DefaultBounds, -+ OutlinedFn, OutlinedFnID, Args, CBFunc, -+ ArgAccessorFuncCB); + OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, +@@ -7474,16 +7708,17 @@ + // the target region itself is generated using the callbacks CBFunc + // and ArgAccessorFuncCB + if (Error Err = emitTargetOutlinedFunction( +- *this, Builder, IsOffloadEntry, EntryInfo, OutlinedFn, OutlinedFnID, +- Args, CBFunc, ArgAccessorFuncCB)) ++ *this, Builder, IsSPMD, IsOffloadEntry, EntryInfo, DefaultBounds, ++ OutlinedFn, OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB)) + return Err; + + // If we are not on the target device, then we need to generate code + // to make a remote call (offload) to the previously outlined function + // that represents the target region. Do that now. if (!Config.isTargetDevice()) - emitTargetCall(*this, Builder, AllocaIP, OutlinedFn, OutlinedFnID, NumTeams, -- NumThreads, Args, GenMapInfoCB); +- NumThreads, Args, GenMapInfoCB, Dependencies, HasNowait); + emitTargetCall(*this, Builder, AllocaIP, DefaultBounds, RuntimeBounds, -+ OutlinedFn, OutlinedFnID, Args, GenMapInfoCB); - ++ OutlinedFn, OutlinedFnID, Args, IfCond, GenMapInfoCB, ++ Dependencies, HasNowait); return Builder.saveIP(); } -@@ -6360,6 +7618,43 @@ - } - OpenMPIRBuilder::InsertPointTy +@@ -8590,6 +8825,44 @@ + + return Builder.saveIP(); + } ++ ++OpenMPIRBuilder::InsertPointOrErrorTy +OpenMPIRBuilder::createDistribute(const LocationDescription &Loc, + InsertPointTy OuterAllocaIP, + BodyGenCallbackTy BodyGenCB) { @@ -12105,7 +11724,8 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu + // Generate the body of distribute clause + InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); + InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); -+ BodyGenCB(AllocaIP, CodeGenIP); ++ if (Error Err = BodyGenCB(AllocaIP, CodeGenIP)) ++ return Err; + + OutlineInfo OI; + OI.OuterAllocaBB = OuterAllocaIP.getBlock(); @@ -12117,123 +11737,40 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu + + return Builder.saveIP(); +} -+ -+OpenMPIRBuilder::InsertPointTy + + OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTeams(const LocationDescription &Loc, - BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower, - Value *NumTeamsUpper, Value *ThreadLimit, -@@ -6488,7 +7783,6 @@ - Builder.CreateCall(getOrCreateRuntimeFunctionPtr( - omp::RuntimeFunction::OMPRTL___kmpc_fork_teams), - Args); -- - while (!ToBeDeleted.empty()) { - ToBeDeleted.top()->eraseFromParent(); - ToBeDeleted.pop(); -diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Transforms/IPO/OpenMPOpt.cpp llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp ---- llvm-project.orig/llvm/lib/Transforms/IPO/OpenMPOpt.cpp 2024-06-12 10:43:13.628199897 -0500 -+++ llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp 2024-06-12 10:44:09.355614196 -0500 -@@ -287,6 +287,20 @@ - OpenMPPostLink(OpenMPPostLink) { - - OMPBuilder.Config.IsTargetDevice = isOpenMPDevice(OMPBuilder.M); -+ const Triple T(OMPBuilder.M.getTargetTriple()); -+ switch (T.getArch()) { -+ case llvm::Triple::nvptx: -+ case llvm::Triple::nvptx64: -+ case llvm::Triple::amdgcn: -+ assert(OMPBuilder.Config.IsTargetDevice && -+ "OpenMP AMDGPU/NVPTX is only prepared to deal with device code."); -+ OMPBuilder.Config.IsGPU = true; -+ break; -+ default: -+ OMPBuilder.Config.IsGPU = false; -+ break; -+ } -+ - OMPBuilder.initialize(); - initializeRuntimeFunctions(M); - initializeInternalControlVars(); -@@ -535,6 +549,7 @@ - void recollectUses() { - for (int Idx = 0; Idx < RFIs.size(); ++Idx) - recollectUsesForFunction(static_cast(Idx)); -+ OMPBuilder.Config.IsTargetDevice = isOpenMPDevice(OMPBuilder.M); +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/llvm/lib/Transforms/Utils/CodeExtractor.cpp llvm-project-aso/llvm/lib/Transforms/Utils/CodeExtractor.cpp +--- llvm-project-aso-orig/llvm/lib/Transforms/Utils/CodeExtractor.cpp 2024-11-23 20:25:27.183273941 -0600 ++++ llvm-project-aso/llvm/lib/Transforms/Utils/CodeExtractor.cpp 2024-11-23 20:39:47.196175308 -0600 +@@ -1801,7 +1801,7 @@ + ReloadOutputs.push_back(alloca); } - // Helper function to inherit the calling convention of the function callee. -diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Transforms/Utils/CodeExtractor.cpp llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp ---- llvm-project.orig/llvm/lib/Transforms/Utils/CodeExtractor.cpp 2024-06-12 10:43:13.660199561 -0500 -+++ llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp 2024-06-12 10:44:09.355614196 -0500 -@@ -1196,6 +1196,7 @@ - - StructType *StructArgTy = nullptr; - AllocaInst *Struct = nullptr; -+ Instruction *StructSpaceCast = nullptr; - unsigned NumAggregatedInputs = 0; - if (AggregateArgs && !StructValues.empty()) { - std::vector ArgTypes; -@@ -1214,20 +1215,34 @@ +- AllocaInst *Struct = nullptr; ++ Instruction *Struct = nullptr; + if (!StructValues.empty()) { + Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, + "structArg", AllocaBlock->getFirstInsertionPt()); +@@ -1809,11 +1809,11 @@ + auto *StructSpaceCast = new AddrSpaceCastInst( Struct, PointerType ::get(Context, 0), "structArg.ascast"); StructSpaceCast->insertAfter(Struct); - params.push_back(StructSpaceCast); -+ // Store aggregated inputs in the struct. -+ for (unsigned i = 0, e = StructValues.size(); i != e; ++i) { -+ if (inputs.contains(StructValues[i])) { -+ Value *Idx[2]; -+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); -+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); -+ GetElementPtrInst *GEP = -+ GetElementPtrInst::Create(StructArgTy, StructSpaceCast, Idx, -+ "gep_" + StructValues[i]->getName()); -+ GEP->insertInto(codeReplacer, codeReplacer->end()); -+ new StoreInst(StructValues[i], GEP, codeReplacer); -+ NumAggregatedInputs++; -+ } -+ } - } else { - params.push_back(Struct); -- } -- // Store aggregated inputs in the struct. -- for (unsigned i = 0, e = StructValues.size(); i != e; ++i) { -- if (inputs.contains(StructValues[i])) { -- Value *Idx[2]; -- Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); -- Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); -- GetElementPtrInst *GEP = GetElementPtrInst::Create( -- StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); -- GEP->insertInto(codeReplacer, codeReplacer->end()); -- new StoreInst(StructValues[i], GEP, codeReplacer); -- NumAggregatedInputs++; -+ // Store aggregated inputs in the struct. -+ for (unsigned i = 0, e = StructValues.size(); i != e; ++i) { -+ if (inputs.contains(StructValues[i])) { -+ Value *Idx[2]; -+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); -+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); -+ GetElementPtrInst *GEP = GetElementPtrInst::Create( -+ StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); -+ GEP->insertInto(codeReplacer, codeReplacer->end()); -+ new StoreInst(StructValues[i], GEP, codeReplacer); -+ NumAggregatedInputs++; -+ } - } +- params.push_back(StructSpaceCast); +- } else { +- params.push_back(Struct); ++ Struct = StructSpaceCast; } - } -@@ -1262,7 +1277,8 @@ - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx); - GetElementPtrInst *GEP = GetElementPtrInst::Create( -- StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); -+ StructArgTy, StructSpaceCast ? StructSpaceCast : Struct, Idx, -+ "gep_reload_" + outputs[i]->getName()); - GEP->insertInto(codeReplacer, codeReplacer->end()); - Output = GEP; - ++aggIdx; -diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp llvm-project/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp ---- llvm-project.orig/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp 2024-06-12 10:43:15.156183831 -0500 -+++ llvm-project/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp 2024-06-12 10:44:09.359614154 -0500 -@@ -600,6 +600,7 @@ + ++ params.push_back(Struct); ++ + unsigned AggIdx = 0; + for (Value *input : inputs) { + if (!StructValues.contains(input)) +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp llvm-project-aso/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +--- llvm-project-aso-orig/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp 2024-10-29 11:07:19.981633529 -0500 ++++ llvm-project-aso/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp 2024-11-23 20:39:47.196175308 -0600 +@@ -629,6 +629,7 @@ "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"); OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = true; @@ -12241,7 +11778,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -707,6 +708,7 @@ +@@ -741,6 +742,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12249,7 +11786,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -812,6 +814,7 @@ +@@ -851,6 +853,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12257,7 +11794,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -906,6 +909,7 @@ +@@ -951,6 +954,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12265,7 +11802,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -1015,6 +1019,7 @@ +@@ -1068,6 +1072,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12273,7 +11810,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -1120,6 +1125,7 @@ +@@ -1176,6 +1181,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12281,7 +11818,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -1234,6 +1240,7 @@ +@@ -1298,6 +1304,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12289,7 +11826,37 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -2237,6 +2244,7 @@ +@@ -1420,8 +1427,7 @@ + EXPECT_EQ(&Loop->getAfter()->front(), RetInst); + } + +-TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) { +- using InsertPointTy = OpenMPIRBuilder::InsertPointTy; ++TEST_F(OpenMPIRBuilderTest, CanonicalLoopTripCount) { + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + IRBuilder<> Builder(BB); +@@ -1437,17 +1443,8 @@ + Value *StartVal = ConstantInt::get(LCTy, Start); + Value *StopVal = ConstantInt::get(LCTy, Stop); + Value *StepVal = ConstantInt::get(LCTy, Step); +- auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) { +- return Error::success(); +- }; +- Expected LoopResult = +- OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal, +- StepVal, IsSigned, InclusiveStop); +- assert(LoopResult && "unexpected error"); +- CanonicalLoopInfo *Loop = *LoopResult; +- Loop->assertOK(); +- Builder.restoreIP(Loop->getAfterIP()); +- Value *TripCount = Loop->getTripCount(); ++ Value *TripCount = OMPBuilder.calculateCanonicalLoopTripCount( ++ Loc, StartVal, StopVal, StepVal, IsSigned, InclusiveStop); + return cast(TripCount)->getValue().getZExtValue(); + }; + +@@ -2332,6 +2329,7 @@ "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"); OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = true; @@ -12297,7 +11864,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); IRBuilder<> Builder(BB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); -@@ -2291,14 +2299,21 @@ +@@ -2389,14 +2387,21 @@ // Check that no variables except for loop counter are used in loop body EXPECT_EQ(Constant::getNullValue(Builder.getPtrTy()), WorkshareLoopRuntimeCall->getArgOperand(2)); @@ -12321,7 +11888,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); IRBuilder<> Builder(BB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); -@@ -2400,6 +2415,7 @@ +@@ -2503,6 +2508,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12329,7 +11896,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR BasicBlock *Body; CallInst *Call; -@@ -2475,6 +2491,7 @@ +@@ -2579,6 +2585,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12337,7 +11904,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); IRBuilder<> Builder(BB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); -@@ -2633,6 +2650,7 @@ +@@ -2742,6 +2749,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12345,7 +11912,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); IRBuilder<> Builder(BB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); -@@ -4336,6 +4354,7 @@ +@@ -4481,6 +4489,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12353,7 +11920,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -4412,6 +4431,7 @@ +@@ -4560,6 +4569,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12361,7 +11928,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> &Builder = OMPBuilder.Builder; -@@ -4463,6 +4483,7 @@ +@@ -4613,6 +4623,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12369,7 +11936,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> &Builder = OMPBuilder.Builder; -@@ -4515,6 +4536,7 @@ +@@ -4669,6 +4680,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12377,7 +11944,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> &Builder = OMPBuilder.Builder; -@@ -4570,6 +4592,7 @@ +@@ -4727,6 +4739,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12385,7 +11952,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> &Builder = OMPBuilder.Builder; -@@ -4630,6 +4653,7 @@ +@@ -4790,6 +4803,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12393,7 +11960,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> &Builder = OMPBuilder.Builder; -@@ -4689,6 +4713,7 @@ +@@ -4852,6 +4866,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12401,7 +11968,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> &Builder = OMPBuilder.Builder; -@@ -4887,6 +4912,7 @@ +@@ -5053,6 +5068,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12409,7 +11976,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -5122,6 +5148,7 @@ +@@ -5298,6 +5314,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12417,18 +11984,27 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -5972,7 +5999,9 @@ +@@ -6182,9 +6199,17 @@ + TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17); OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL}); - Builder.restoreIP(OMPBuilder.createTarget( -- OmpLoc, Builder.saveIP(), Builder.saveIP(), EntryInfo, -1, 0, Inputs, -+ OmpLoc, /*IsSPMD=*/false, Builder.saveIP(), Builder.saveIP(), EntryInfo, -+ /*DefaultBounds=*/OpenMPIRBuilder::TargetKernelDefaultBounds(), -+ /*RuntimeBounds=*/OpenMPIRBuilder::TargetKernelRuntimeBounds(), Inputs, - GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB)); ++ OpenMPIRBuilder::TargetKernelDefaultBounds DefaultBounds; ++ DefaultBounds.MaxTeams.push_back(-1); ++ DefaultBounds.MaxThreads.push_back(-1); ++ OpenMPIRBuilder::TargetKernelRuntimeBounds RuntimeBounds; ++ RuntimeBounds.TargetThreadLimit.push_back(nullptr); ++ RuntimeBounds.TeamsThreadLimit.push_back(nullptr); ++ RuntimeBounds.MaxTeams.push_back(nullptr); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget( +- OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), Builder.saveIP(), +- EntryInfo, -1, 0, Inputs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB); ++ OmpLoc, /*IsSPMD=*/false, /*IsOffloadEntry=*/true, /*IfCond=*/nullptr, ++ Builder.saveIP(), Builder.saveIP(), EntryInfo, DefaultBounds, ++ RuntimeBounds, Inputs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); OMPBuilder.finalize(); - Builder.CreateRetVoid(); -@@ -6012,6 +6041,7 @@ +@@ -6229,6 +6254,7 @@ } TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) { @@ -12436,23 +12012,30 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.setConfig( OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); -@@ -6075,10 +6105,11 @@ +@@ -6292,11 +6318,17 @@ TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2, /*Line=*/3, /*Count=*/0); -- Builder.restoreIP( -- OMPBuilder.createTarget(Loc, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1, +- OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = +- OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, +- EntryInfo, /*NumTeams=*/-1, - /*NumThreads=*/0, CapturedArgs, GenMapInfoCB, -- BodyGenCB, SimpleArgAccessorCB)); -+ Builder.restoreIP(OMPBuilder.createTarget( -+ Loc, /*IsSPMD=*/false, EntryIP, EntryIP, EntryInfo, -+ /*DefaultBounds=*/OpenMPIRBuilder::TargetKernelDefaultBounds(), -+ /*RuntimeBounds=*/OpenMPIRBuilder::TargetKernelRuntimeBounds(), -+ CapturedArgs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB)); - - Builder.CreateRetVoid(); - OMPBuilder.finalize(); -@@ -6158,6 +6189,7 @@ +- BodyGenCB, SimpleArgAccessorCB); ++ OpenMPIRBuilder::TargetKernelDefaultBounds DefaultBounds; ++ DefaultBounds.MaxTeams.push_back(-1); ++ DefaultBounds.MaxThreads.push_back(-1); ++ OpenMPIRBuilder::TargetKernelRuntimeBounds RuntimeBounds; ++ RuntimeBounds.TargetThreadLimit.push_back(nullptr); ++ RuntimeBounds.TeamsThreadLimit.push_back(nullptr); ++ RuntimeBounds.MaxTeams.push_back(nullptr); ++ OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget( ++ Loc, /*IsSPMD=*/false, /*IsOffloadEntry=*/true, /*IfCond=*/nullptr, ++ EntryIP, EntryIP, EntryInfo, DefaultBounds, RuntimeBounds, CapturedArgs, ++ GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); + +@@ -6378,6 +6410,7 @@ } TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) { @@ -12460,23 +12043,30 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.setConfig( OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); -@@ -6223,10 +6255,11 @@ +@@ -6443,11 +6476,17 @@ TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2, /*Line=*/3, /*Count=*/0); -- Builder.restoreIP( -- OMPBuilder.createTarget(Loc, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1, +- OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = +- OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, +- EntryInfo, /*NumTeams=*/-1, - /*NumThreads=*/0, CapturedArgs, GenMapInfoCB, -- BodyGenCB, SimpleArgAccessorCB)); -+ Builder.restoreIP(OMPBuilder.createTarget( -+ Loc, /*IsSPMD=*/false, EntryIP, EntryIP, EntryInfo, -+ /*DefaultBounds=*/OpenMPIRBuilder::TargetKernelDefaultBounds(), -+ /*RuntimeBounds=*/OpenMPIRBuilder::TargetKernelRuntimeBounds(), -+ CapturedArgs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB)); - - Builder.CreateRetVoid(); - OMPBuilder.finalize(); -@@ -6311,6 +6344,7 @@ +- BodyGenCB, SimpleArgAccessorCB); ++ OpenMPIRBuilder::TargetKernelDefaultBounds DefaultBounds; ++ DefaultBounds.MaxTeams.push_back(-1); ++ DefaultBounds.MaxThreads.push_back(-1); ++ OpenMPIRBuilder::TargetKernelRuntimeBounds RuntimeBounds; ++ RuntimeBounds.TargetThreadLimit.push_back(nullptr); ++ RuntimeBounds.TeamsThreadLimit.push_back(nullptr); ++ RuntimeBounds.MaxTeams.push_back(nullptr); ++ OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget( ++ Loc, /*IsSPMD=*/false, /*IsOffloadEntry=*/true, /*IfCond=*/nullptr, ++ EntryIP, EntryIP, EntryInfo, DefaultBounds, RuntimeBounds, CapturedArgs, ++ GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); + +@@ -6534,6 +6573,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12484,7 +12074,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -6440,6 +6474,7 @@ +@@ -6665,6 +6705,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12492,7 +12082,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -6471,6 +6506,7 @@ +@@ -6699,6 +6740,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12500,7 +12090,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -6501,6 +6537,7 @@ +@@ -6733,6 +6775,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12508,7 +12098,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -6575,6 +6612,7 @@ +@@ -6811,6 +6854,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12516,7 +12106,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -6629,6 +6667,7 @@ +@@ -6870,6 +6914,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12524,7 +12114,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -@@ -6790,6 +6829,7 @@ +@@ -7037,6 +7082,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = false; @@ -12532,192 +12122,292 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td ---- llvm-project.orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td 2024-06-12 10:43:15.228183074 -0500 -+++ llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td 2024-06-12 10:44:09.359614154 -0500 -@@ -102,8 +102,14 @@ - - Operation &firstOp = *r.op_begin(); - Operation &secondOp = *(std::next(r.op_begin())); -- return ::llvm::isa(firstOp) && -- secondOp.hasTrait(); -+ -+ if (!secondOp.hasTrait()) -+ return false; -+ -+ if (auto wrapper = ::llvm::dyn_cast(firstOp)) -+ return wrapper.isWrapper(); -+ -+ return ::llvm::isa(firstOp); - }] - >, - InterfaceMethod< -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td ---- llvm-project.orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 2024-06-12 10:43:15.228183074 -0500 -+++ llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 2024-06-12 10:44:09.359614154 -0500 -@@ -467,7 +467,7 @@ - def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, - DeclareOpInterfaceMethods, - RecursiveMemoryEffects, ReductionClauseInterface, -- SingleBlockImplicitTerminator<"TerminatorOp">]> { -+ SingleBlock]> { - let summary = "worksharing-loop construct"; - let description = [{ - The worksharing-loop construct specifies that the iterations of the loop(s) -@@ -577,8 +577,7 @@ - - def SimdOp : OpenMP_Op<"simd", [AttrSizedOperandSegments, - DeclareOpInterfaceMethods, -- RecursiveMemoryEffects, -- SingleBlockImplicitTerminator<"TerminatorOp">]> { -+ RecursiveMemoryEffects, SingleBlock]> { - let summary = "simd construct"; - let description = [{ - The simd construct can be applied to a loop to indicate that the loop can be -@@ -682,8 +681,7 @@ - //===----------------------------------------------------------------------===// - def DistributeOp : OpenMP_Op<"distribute", [AttrSizedOperandSegments, - DeclareOpInterfaceMethods, -- RecursiveMemoryEffects, -- SingleBlockImplicitTerminator<"TerminatorOp">]> { -+ RecursiveMemoryEffects, SingleBlock]> { - let summary = "distribute construct"; - let description = [{ - The distribute construct specifies that the iterations of one or more loops -@@ -856,7 +854,7 @@ - AutomaticAllocationScope, - DeclareOpInterfaceMethods, - RecursiveMemoryEffects, ReductionClauseInterface, -- SingleBlockImplicitTerminator<"TerminatorOp">]> { -+ SingleBlock]> { - let summary = "taskloop construct"; - let description = [{ - The taskloop construct specifies that the iterations of one or more -@@ -1567,13 +1565,16 @@ - - The optional $thread_limit specifies the limit on the number of threads +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/docs/Dialects/OpenMPDialect/_index.md llvm-project-aso/mlir/docs/Dialects/OpenMPDialect/_index.md +--- llvm-project-aso-orig/mlir/docs/Dialects/OpenMPDialect/_index.md 2024-10-18 17:40:33.824978757 -0500 ++++ llvm-project-aso/mlir/docs/Dialects/OpenMPDialect/_index.md 2024-11-23 20:39:47.200175294 -0600 +@@ -297,7 +297,8 @@ + introduction of private copies of the same underlying variable defined outside + the MLIR operation the clause is attached to. Currently, clauses with this + property can be classified into three main categories: +- - Map-like clauses: `map`, `use_device_addr` and `use_device_ptr`. ++ - Map-like clauses: `host_eval`, `map`, `use_device_addr` and ++`use_device_ptr`. + - Reduction-like clauses: `in_reduction`, `reduction` and `task_reduction`. + - Privatization clauses: `private`. + +@@ -522,3 +523,58 @@ + omp.terminator + } {omp.composite} + ``` ++ ++## Host-Evaluated Clauses in Target Regions ++ ++The `omp.target` operation, which represents the OpenMP `target` construct, is ++marked with the `IsolatedFromAbove` trait. This means that, inside of its ++region, no MLIR values defined outside of the op itself can be used. This is ++consistent with the OpenMP specification of the `target` construct, which ++mandates that all host device values used inside of the `target` region must ++either be privatized (data-sharing) or mapped (data-mapping). ++ ++Normally, clauses applied to a construct are evaluated before entering that ++construct. Further, in some cases, the OpenMP specification stipulates that ++clauses be evaluated _on the host device_ on entry to a parent `target` ++construct. In particular, the `num_teams` and `thread_limit` clauses of the ++`teams` construct must be evaluated on the host device if it's nested inside or ++combined with a `target` construct. ++ ++Additionally, the runtime library targeted by the MLIR to LLVM IR translation of ++the OpenMP dialect supports the optimized launch of SPMD kernels (i.e. ++`target teams distribute parallel {do,for}` in OpenMP), which requires ++specifying in advance what the total trip count of the loop is. Consequently, it ++is also beneficial to evaluate the trip count on the host device prior to the ++kernel launch. ++ ++These host-evaluated values in MLIR would need to be placed outside of the ++`omp.target` region and also attached to the corresponding nested operations, ++which is not possible because of the `IsolatedFromAbove` trait. The solution ++implemented to address this problem has been to introduce the `host_eval` ++argument to the `omp.target` operation. It works similarly to a `map` clause, ++but its only intended use is to forward host-evaluated values to their ++corresponding operation inside of the region. Any uses outside of the previously ++described result in a verifier error. ++ ++```mlir ++// Initialize %0, %1, %2, %3... ++omp.target host_eval(%0 -> %nt, %1 -> %lb, %2 -> %ub, %3 -> %step : i32, i32, i32, i32) { ++ omp.teams num_teams(to %nt : i32) { ++ omp.parallel { ++ omp.distribute { ++ omp.wsloop { ++ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { ++ // ... ++ omp.yield ++ } ++ omp.terminator ++ } {omp.composite} ++ omp.terminator ++ } {omp.composite} ++ omp.terminator ++ } {omp.composite} ++ omp.terminator ++ } ++ omp.terminator ++} ++``` +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/include/mlir/Conversion/Passes.td llvm-project-aso/mlir/include/mlir/Conversion/Passes.td +--- llvm-project-aso-orig/mlir/include/mlir/Conversion/Passes.td 2024-09-24 18:07:10.079914647 -0500 ++++ llvm-project-aso/mlir/include/mlir/Conversion/Passes.td 2024-11-23 20:39:47.200175294 -0600 +@@ -754,7 +754,7 @@ + } -- The optional $nowait eliminates the implicit barrier so the parent task can make progress -- even if the target task is not yet completed. -+ The optional $trip_count indicates the total number of loop iterations, only if this -+ target region represents a single teams+distribute+parallel worksharing loop. + //===----------------------------------------------------------------------===// +-// MathToLibm ++// MathToROCDL + //===----------------------------------------------------------------------===// - The `depends` and `depend_vars` arguments are variadic lists of values - that specify the dependencies of this particular target task in relation to - other tasks. + def ConvertMathToROCDL : Pass<"convert-math-to-rocdl", "ModuleOp"> { +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +--- llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h 2024-09-13 09:46:39.630282131 -0500 ++++ llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h 2024-11-23 20:39:47.200175294 -0600 +@@ -41,6 +41,12 @@ + // Extra operation operand structures. + //===----------------------------------------------------------------------===// -+ The optional $nowait eliminates the implicit barrier so the parent task can make progress -+ even if the target task is not yet completed. ++/// Clauses that correspond to operations other than omp.target, but might have ++/// to be evaluated outside of a parent target region. ++using HostEvaluatedOperands = ++ detail::Clauses; + - The optional $is_device_ptr indicates list items are device pointers. + // TODO: Add `indirect` clause. + using DeclareTargetOperands = detail::Clauses; - The optional $has_device_addr indicates that list items already have device -@@ -1583,13 +1584,22 @@ - The optional $map_operands maps data from the task’s environment to the - device environment. +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +--- llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td 2024-11-14 15:28:41.918639373 -0600 ++++ llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td 2024-11-23 20:39:47.200175294 -0600 +@@ -445,6 +445,44 @@ + def OpenMP_HintClause : OpenMP_HintClauseSkip<>; -- TODO: defaultmap, in_reduction -+ The optional $num_teams_lower, $num_teams_upper and $teams_thread_limit -+ arguments represent the corresponding arguments of a directly nested TeamsOp. They -+ can be only set in this operation when representing combined or composite constructs -+ that include TARGET and TEAMS, so that they can be evaluated in the host device. + //===----------------------------------------------------------------------===// ++// Not in the spec: Clause-like structure to hold host-evaluated values. ++//===----------------------------------------------------------------------===// + -+ The optional $num_threads argument represents the corresponding argument of a nested -+ ParallelOp, which is only allowed if this target region contains a single (possibly -+ multi-level) nest of OpenMP operations including a ParallelOp. - -+ TODO: defaultmap, in_reduction - }]; - - let arguments = (ins Optional:$if_expr, - Optional:$device, - Optional:$thread_limit, -+ Optional:$trip_count, - OptionalAttr:$depends, - Variadic:$depend_vars, - UnitAttr:$nowait, -@@ -1597,7 +1607,11 @@ - Variadic:$has_device_addr, - Variadic:$map_operands, - Variadic:$private_vars, -- OptionalAttr:$privatizers); -+ OptionalAttr:$privatizers, -+ Optional:$num_teams_lower, -+ Optional:$num_teams_upper, -+ Optional:$teams_thread_limit, -+ Optional:$num_threads); - - let regions = (region AnyRegion:$region); - -@@ -1609,16 +1623,36 @@ - oilist( `if` `(` $if_expr `)` - | `device` `(` $device `:` type($device) `)` - | `thread_limit` `(` $thread_limit `:` type($thread_limit) `)` -+ | `trip_count` `(` $trip_count `:` type($trip_count) `)` - | `nowait` $nowait - | `is_device_ptr` `(` $is_device_ptr `:` type($is_device_ptr) `)` - | `has_device_addr` `(` $has_device_addr `:` type($has_device_addr) `)` - | `map_entries` `(` custom($map_operands, type($map_operands)) `)` - | `private` `(` custom($private_vars, type($private_vars), $privatizers) `)` - | `depend` `(` custom($depend_vars, type($depend_vars), $depends) `)` -+ | `num_teams` `(` ( $num_teams_lower^ `:` type($num_teams_lower) )? `to` -+ $num_teams_upper `:` type($num_teams_upper) `)` -+ | `teams_thread_limit` `(` $teams_thread_limit `:` type($teams_thread_limit) `)` -+ | `num_threads` `(` $num_threads `:` type($num_threads) `)` - ) $region attr-dict - }]; - - let hasVerifier = 1; ++class OpenMP_HostEvalClauseSkip< ++ bit traits = false, bit arguments = false, bit assemblyFormat = false, ++ bit description = false, bit extraClassDeclaration = false ++ > : OpenMP_Clause { ++ let traits = [ ++ BlockArgOpenMPOpInterface ++ ]; ++ ++ let arguments = (ins ++ Variadic:$host_eval_vars ++ ); + + let extraClassDeclaration = [{ -+ /// Returns the innermost OpenMP dialect operation nested inside of this -+ /// operation's region. For an operation to be detected as captured, it must -+ /// be inside a (possibly multi-level) nest of OpenMP dialect operation's ++ unsigned numHostEvalBlockArgs() { ++ return getHostEvalVars().size(); ++ } ++ }]; ++ ++ let description = [{ ++ The optional `host_eval_vars` holds values defined outside of the region of ++ the `IsolatedFromAbove` operation for which a corresponding entry block ++ argument is defined. The only legal uses for these captured values are the ++ following: ++ - `num_teams` or `thread_limit` clause of an immediately nested ++ `omp.teams` operation. ++ - If the operation is the top-level `omp.target` of a target SPMD kernel: ++ - `num_threads` clause of the nested `omp.parallel` operation. ++ - Bounds and steps of the nested `omp.loop_nest` operation. ++ }]; ++} ++ ++def OpenMP_HostEvalClause : OpenMP_HostEvalClauseSkip<>; ++ ++//===----------------------------------------------------------------------===// + // V5.2: [3.4] `if` clause + //===----------------------------------------------------------------------===// + +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td +--- llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td 2024-10-18 17:40:33.840978593 -0500 ++++ llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td 2024-11-23 20:39:47.200175294 -0600 +@@ -25,6 +25,10 @@ + + let methods = [ + // Default-implemented methods to be overriden by the corresponding clauses. ++ InterfaceMethod<"Get number of block arguments defined by `host_eval`.", ++ "unsigned", "numHostEvalBlockArgs", (ins), [{}], [{ ++ return 0; ++ }]>, + InterfaceMethod<"Get number of block arguments defined by `in_reduction`.", + "unsigned", "numInReductionBlockArgs", (ins), [{}], [{ + return 0; +@@ -55,9 +59,14 @@ + }]>, + + // Unified access methods for clause-associated entry block arguments. ++ InterfaceMethod<"Get start index of block arguments defined by `host_eval`.", ++ "unsigned", "getHostEvalBlockArgsStart", (ins), [{ ++ return 0; ++ }]>, + InterfaceMethod<"Get start index of block arguments defined by `in_reduction`.", + "unsigned", "getInReductionBlockArgsStart", (ins), [{ +- return 0; ++ auto iface = ::llvm::cast(*$_op); ++ return iface.getHostEvalBlockArgsStart() + $_op.numHostEvalBlockArgs(); + }]>, + InterfaceMethod<"Get start index of block arguments defined by `map`.", + "unsigned", "getMapBlockArgsStart", (ins), [{ +@@ -91,6 +100,13 @@ + return iface.getUseDeviceAddrBlockArgsStart() + $_op.numUseDeviceAddrBlockArgs(); + }]>, + ++ InterfaceMethod<"Get block arguments defined by `host_eval`.", ++ "::llvm::MutableArrayRef<::mlir::BlockArgument>", ++ "getHostEvalBlockArgs", (ins), [{ ++ auto iface = ::llvm::cast(*$_op); ++ return $_op->getRegion(0).getArguments().slice( ++ iface.getHostEvalBlockArgsStart(), $_op.numHostEvalBlockArgs()); ++ }]>, + InterfaceMethod<"Get block arguments defined by `in_reduction`.", + "::llvm::MutableArrayRef<::mlir::BlockArgument>", + "getInReductionBlockArgs", (ins), [{ +@@ -147,10 +163,11 @@ + + let verify = [{ + auto iface = ::llvm::cast($_op); +- unsigned expectedArgs = iface.numInReductionBlockArgs() + +- iface.numMapBlockArgs() + iface.numPrivateBlockArgs() + +- iface.numReductionBlockArgs() + iface.numTaskReductionBlockArgs() + +- iface.numUseDeviceAddrBlockArgs() + iface.numUseDevicePtrBlockArgs(); ++ unsigned expectedArgs = iface.numHostEvalBlockArgs() + ++ iface.numInReductionBlockArgs() + iface.numMapBlockArgs() + ++ iface.numPrivateBlockArgs() + iface.numReductionBlockArgs() + ++ iface.numTaskReductionBlockArgs() + iface.numUseDeviceAddrBlockArgs() + ++ iface.numUseDevicePtrBlockArgs(); + if ($_op->getRegion(0).getNumArguments() < expectedArgs) + return $_op->emitOpError() << "expected at least " << expectedArgs + << " entry block argument(s)"; +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +--- llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 2024-11-23 20:25:27.479272877 -0600 ++++ llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 2024-11-23 20:39:47.200175294 -0600 +@@ -1213,9 +1213,10 @@ + ], clauses = [ + // TODO: Complete clause list (defaultmap, uses_allocators). + OpenMP_AllocateClause, OpenMP_DependClause, OpenMP_DeviceClause, +- OpenMP_HasDeviceAddrClause, OpenMP_IfClause, OpenMP_InReductionClause, +- OpenMP_IsDevicePtrClause, OpenMP_MapClauseSkip, +- OpenMP_NowaitClause, OpenMP_PrivateClause, OpenMP_ThreadLimitClause ++ OpenMP_HasDeviceAddrClause, OpenMP_HostEvalClause, OpenMP_IfClause, ++ OpenMP_InReductionClause, OpenMP_IsDevicePtrClause, ++ OpenMP_MapClauseSkip, OpenMP_NowaitClause, ++ OpenMP_PrivateClause, OpenMP_ThreadLimitClause + ], singleRegion = true> { + let summary = "target construct"; + let description = [{ +@@ -1225,6 +1226,21 @@ + The optional `if_expr` parameter specifies a boolean result of a conditional + check. If this value is 1 or is not provided then the target region runs on + a device, if it is 0 then the target region is executed on the host device. ++ ++ The optional `trip_count` indicates the total number of loop iterations, ++ only if this target region represents a single TEAMS+DISTRIBUTE+PARALLEL ++ worksharing loop. ++ ++ The optional `num_teams_lower`, `num_teams_upper` and `teams_thread_limit` ++ arguments represent the corresponding arguments of a directly nested ++ `omp.teams`. They can be only set in this operation when representing ++ combined constructs that include TARGET and TEAMS, so that they can be ++ evaluated in the host device. ++ ++ The optional `num_threads` argument represents the corresponding argument of ++ a nested `omp.parallel`, which is only allowed if this target region ++ contains a single (possibly multi-level) nest of OpenMP operations including ++ an `omp.parallel`. + }] # clausesDescription; + + let builders = [ +@@ -1233,13 +1249,30 @@ + + let extraClassDeclaration = [{ + unsigned numMapBlockArgs() { return getMapVars().size(); } ++ ++ /// Returns the innermost OpenMP dialect operation captured by this target ++ /// construct. For an operation to be detected as captured, it must be ++ /// inside a (possibly multi-level) nest of OpenMP dialect operation's + /// regions where none of these levels contain other operations considered + /// not-allowed for these purposes (i.e. only terminator operations are + /// allowed from the OpenMP dialect, and other dialect's operations are + /// allowed as long as they don't have a memory write effect). ++ /// ++ /// If there are omp.loop_nest operations in the sequence of nested ++ /// operations, the top level one will be the one captured. + Operation *getInnermostCapturedOmpOp(); + + /// Tells whether this target region represents a single worksharing loop + /// wrapped by omp.teams omp.distribute and omp.parallel constructs. + bool isTargetSPMDLoop(); -+ }]; - } - + }] # clausesExtraClassDeclaration; + + let assemblyFormat = clausesAssemblyFormat # [{ +- custom( +- $region, $in_reduction_vars, type($in_reduction_vars), +- $in_reduction_byref, $in_reduction_syms, $map_vars, type($map_vars), +- $private_vars, type($private_vars), $private_syms) attr-dict ++ custom( ++ $region, $host_eval_vars, type($host_eval_vars), $in_reduction_vars, ++ type($in_reduction_vars), $in_reduction_byref, $in_reduction_syms, ++ $map_vars, type($map_vars), $private_vars, type($private_vars), ++ $private_syms) attr-dict + }]; -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp llvm-project/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp ---- llvm-project.orig/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp 2024-06-12 10:43:15.264182696 -0500 -+++ llvm-project/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp 2024-06-12 10:44:09.359614154 -0500 -@@ -233,11 +233,11 @@ - }); - target.addDynamicallyLegalOp< - mlir::omp::AtomicUpdateOp, mlir::omp::CriticalOp, mlir::omp::TargetOp, -- mlir::omp::TargetDataOp, mlir::omp::LoopNestOp, -- mlir::omp::OrderedRegionOp, mlir::omp::ParallelOp, mlir::omp::WsloopOp, -- mlir::omp::SimdOp, mlir::omp::MasterOp, mlir::omp::SectionOp, -- mlir::omp::SectionsOp, mlir::omp::SingleOp, mlir::omp::TaskgroupOp, -- mlir::omp::TaskOp, mlir::omp::DeclareReductionOp, -+ mlir::omp::TeamsOp, mlir::omp::DistributeOp, mlir::omp::TargetDataOp, -+ mlir::omp::LoopNestOp, mlir::omp::OrderedRegionOp, mlir::omp::ParallelOp, -+ mlir::omp::WsloopOp, mlir::omp::SimdOp, mlir::omp::MasterOp, -+ mlir::omp::SectionOp, mlir::omp::SectionsOp, mlir::omp::SingleOp, -+ mlir::omp::TaskgroupOp, mlir::omp::TaskOp, mlir::omp::DeclareReductionOp, - mlir::omp::PrivateClauseOp>([&](Operation *op) { - return std::all_of(op->getRegions().begin(), op->getRegions().end(), - [&](Region ®ion) { -@@ -268,6 +268,7 @@ - RegionOpConversion, RegionOpConversion, - RegionOpConversion, RegionOpConversion, - RegionOpConversion, RegionOpConversion, -+ RegionOpConversion, RegionOpConversion, - RegionLessOpWithVarOperandsConversion, - RegionOpWithVarOperandsConversion, - RegionLessOpWithVarOperandsConversion, -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp llvm-project/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp ---- llvm-project.orig/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp 2024-06-12 10:43:15.292182402 -0500 -+++ llvm-project/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp 2024-06-12 10:44:09.359614154 -0500 -@@ -19,7 +19,6 @@ + let hasVerifier = 1; +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp llvm-project-aso/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +--- llvm-project-aso-orig/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp 2024-11-23 20:25:27.491272834 -0600 ++++ llvm-project-aso/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp 2024-11-23 20:39:47.200175294 -0600 +@@ -20,7 +20,6 @@ #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/OperationSupport.h" @@ -12725,46 +12415,93 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Dialect/OpenMP/IR/OpenM #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -@@ -62,15 +61,6 @@ - LLVM::LLVMPointerType> { - Type getElementType(Type pointer) const { return Type(); } +@@ -502,6 +501,7 @@ + : vars(vars), types(types), byref(byref), syms(syms) {} }; -- --struct OpenMPDialectFoldInterface : public DialectFoldInterface { -- using DialectFoldInterface::DialectFoldInterface; -- -- bool shouldMaterializeInto(Region *region) const final { -- // Avoid folding constants across target regions -- return isa(region->getParentOp()); -- } --}; - } // namespace + struct AllRegionParseArgs { ++ std::optional hostEvalArgs; + std::optional inReductionArgs; + std::optional mapArgs; + std::optional privateArgs; +@@ -628,6 +628,11 @@ + AllRegionParseArgs args) { + llvm::SmallVector entryBlockArgs; + ++ if (failed(parseBlockArgClause(parser, entryBlockArgs, "host_eval", ++ args.hostEvalArgs))) ++ return parser.emitError(parser.getCurrentLocation()) ++ << "invalid `host_eval` format"; ++ + if (failed(parseBlockArgClause(parser, entryBlockArgs, "in_reduction", + args.inReductionArgs))) + return parser.emitError(parser.getCurrentLocation()) +@@ -666,8 +671,10 @@ + return parser.parseRegion(region, entryBlockArgs); + } + +-static ParseResult parseInReductionMapPrivateRegion( ++static ParseResult parseHostEvalInReductionMapPrivateRegion( + OpAsmParser &parser, Region ®ion, ++ SmallVectorImpl &hostEvalVars, ++ SmallVectorImpl &hostEvalTypes, + SmallVectorImpl &inReductionVars, + SmallVectorImpl &inReductionTypes, + DenseBoolArrayAttr &inReductionByref, ArrayAttr &inReductionSyms, +@@ -676,6 +683,7 @@ + llvm::SmallVectorImpl &privateVars, + llvm::SmallVectorImpl &privateTypes, ArrayAttr &privateSyms) { + AllRegionParseArgs args; ++ args.hostEvalArgs.emplace(hostEvalVars, hostEvalTypes); + args.inReductionArgs.emplace(inReductionVars, inReductionTypes, + inReductionByref, inReductionSyms); + args.mapArgs.emplace(mapVars, mapTypes); +@@ -789,6 +797,7 @@ + : vars(vars), types(types), byref(byref), syms(syms) {} + }; + struct AllRegionPrintArgs { ++ std::optional hostEvalArgs; + std::optional inReductionArgs; + std::optional mapArgs; + std::optional privateArgs; +@@ -867,6 +876,8 @@ + auto iface = llvm::cast(op); + MLIRContext *ctx = op->getContext(); + ++ printBlockArgClause(p, ctx, "host_eval", iface.getHostEvalBlockArgs(), ++ args.hostEvalArgs); + printBlockArgClause(p, ctx, "in_reduction", iface.getInReductionBlockArgs(), + args.inReductionArgs); + printBlockArgClause(p, ctx, "map_entries", iface.getMapBlockArgs(), +@@ -887,12 +898,14 @@ + p.printRegion(region, /*printEntryBlockArgs=*/false); + } + +-static void printInReductionMapPrivateRegion( +- OpAsmPrinter &p, Operation *op, Region ®ion, ValueRange inReductionVars, ++static void printHostEvalInReductionMapPrivateRegion( ++ OpAsmPrinter &p, Operation *op, Region ®ion, ValueRange hostEvalVars, ++ TypeRange hostEvalTypes, ValueRange inReductionVars, + TypeRange inReductionTypes, DenseBoolArrayAttr inReductionByref, + ArrayAttr inReductionSyms, ValueRange mapVars, TypeRange mapTypes, + ValueRange privateVars, TypeRange privateTypes, ArrayAttr privateSyms) { + AllRegionPrintArgs args; ++ args.hostEvalArgs.emplace(hostEvalVars, hostEvalTypes); + args.inReductionArgs.emplace(inReductionVars, inReductionTypes, + inReductionByref, inReductionSyms); + args.mapArgs.emplace(mapVars, mapTypes); +@@ -1652,20 +1665,170 @@ + // inReductionByref, inReductionSyms. + TargetOp::build(builder, state, /*allocate_vars=*/{}, /*allocator_vars=*/{}, + makeArrayAttr(ctx, clauses.dependKinds), clauses.dependVars, +- clauses.device, clauses.hasDeviceAddrVars, clauses.ifExpr, ++ clauses.device, clauses.hasDeviceAddrVars, ++ clauses.hostEvalVars, clauses.ifExpr, + /*in_reduction_vars=*/{}, /*in_reduction_byref=*/nullptr, + /*in_reduction_syms=*/nullptr, clauses.isDevicePtrVars, + clauses.mapVars, clauses.nowait, clauses.privateVars, + makeArrayAttr(ctx, clauses.privateSyms), clauses.threadLimit); + } - void OpenMPDialect::initialize() { -@@ -87,7 +77,6 @@ - #include "mlir/Dialect/OpenMP/OpenMPOpsTypes.cpp.inc" - >(); - -- addInterface(); - MemRefType::attachInterface(*getContext()); - LLVM::LLVMPointerType::attachInterface( - *getContext()); -@@ -1417,19 +1406,170 @@ - // reductionDeclSymbols. - TargetOp::build( - builder, state, clauses.ifVar, clauses.deviceVar, clauses.threadLimitVar, -- makeArrayAttr(ctx, clauses.dependTypeAttrs), clauses.dependVars, -- clauses.nowaitAttr, clauses.isDevicePtrVars, clauses.hasDeviceAddrVars, -- clauses.mapVars, clauses.privateVars, -- makeArrayAttr(ctx, clauses.privatizers)); -+ /*trip_count=*/nullptr, makeArrayAttr(ctx, clauses.dependTypeAttrs), -+ clauses.dependVars, clauses.nowaitAttr, clauses.isDevicePtrVars, -+ clauses.hasDeviceAddrVars, clauses.mapVars, clauses.privateVars, -+ makeArrayAttr(ctx, clauses.privatizers), /*num_teams_lower=*/nullptr, -+ /*num_teams_upper=*/nullptr, /*teams_thread_limit=*/nullptr, -+ /*num_threads=*/nullptr); -+} -+ +/// Only allow OpenMP terminators and non-OpenMP ops that have known memory +/// effects, but don't include a memory write effect. +static bool siblingAllowedInCapture(Operation *op) { @@ -12806,87 +12543,88 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Dialect/OpenMP/IR/OpenM + return success(); +} + -+template -+static OpTy getSingleNestedOpOfType(Region ®ion) { -+ auto ops = region.getOps(); -+ return std::distance(ops.begin(), ops.end()) != 1 ? OpTy() : *ops.begin(); - } - LogicalResult TargetOp::verify() { + auto teamsOps = getOps(); + if (std::distance(teamsOps.begin(), teamsOps.end()) > 1) + return emitError("target containing multiple teams constructs"); + -+ if (!isTargetSPMDLoop()) { -+ if (getTripCount()) -+ return emitError("trip_count set on non-SPMD target region"); ++ // Check that host_eval values are only used in legal ways. ++ bool isTargetSPMD = isTargetSPMDLoop(); ++ for (Value hostEvalArg : ++ cast(getOperation()).getHostEvalBlockArgs()) { ++ for (Operation *user : hostEvalArg.getUsers()) { ++ if (auto teamsOp = dyn_cast(user)) { ++ if (llvm::is_contained({teamsOp.getNumTeamsLower(), ++ teamsOp.getNumTeamsUpper(), ++ teamsOp.getThreadLimit()}, ++ hostEvalArg)) ++ continue; ++ ++ return emitOpError() << "host_eval argument only legal as 'num_teams' " ++ "and 'thread_limit' in 'omp.teams'"; ++ } ++ if (auto parallelOp = dyn_cast(user)) { ++ if (isTargetSPMD && hostEvalArg == parallelOp.getNumThreads()) ++ continue; + -+ if (getNumThreads() && !getSingleNestedOpOfType(getRegion())) -+ return emitError("num_threads set on non-SPMD or loop target region"); -+ } ++ return emitOpError() ++ << "host_eval argument only legal as 'num_threads' in " ++ "'omp.parallel' when representing target SPMD"; ++ } ++ if (auto loopNestOp = dyn_cast(user)) { ++ if (isTargetSPMD && ++ (llvm::is_contained(loopNestOp.getLoopLowerBounds(), hostEvalArg) || ++ llvm::is_contained(loopNestOp.getLoopUpperBounds(), hostEvalArg) || ++ llvm::is_contained(loopNestOp.getLoopSteps(), hostEvalArg))) ++ continue; ++ ++ return emitOpError() ++ << "host_eval argument only legal as loop bounds and steps in " ++ "'omp.loop_nest' when representing target SPMD"; ++ } + -+ if (teamsOps.empty()) { -+ if (getNumTeamsLower() || getNumTeamsUpper() || getTeamsThreadLimit()) -+ return emitError( -+ "num_teams and teams_thread_limit arguments only allowed if there is " -+ "an omp.teams child operation"); -+ } else { -+ if (failed(verifyNumTeamsClause(*this, getNumTeamsLower(), -+ getNumTeamsUpper()))) -+ return failure(); ++ return emitOpError() << "host_eval argument illegal use in '" ++ << user->getName() << "' operation"; ++ } + } + LogicalResult verifyDependVars = - verifyDependVarList(*this, getDepends(), getDependVars()); + verifyDependVarList(*this, getDependKinds(), getDependVars()); return failed(verifyDependVars) ? verifyDependVars - : verifyMapClause(*this, getMapOperands()); + : verifyMapClause(*this, getMapVars()); } +Operation *TargetOp::getInnermostCapturedOmpOp() { + Dialect *ompDialect = (*this)->getDialect(); + Operation *capturedOp = nullptr; -+ Region *capturedParentRegion = nullptr; + -+ walk([&](Operation *op) { ++ // Process in pre-order to check operations from outermost to innermost, ++ // ensuring we only enter the region of an operation if it meets the criteria ++ // for being captured. We stop the exploration of nested operations as soon as ++ // we process a region with no operation to be captured. ++ walk([&](Operation *op) { + if (op == *this) -+ return; ++ return WalkResult::advance(); + ++ // Ignore operations of other dialects or omp operations with no regions, ++ // because these will only be checked if they are siblings of an omp ++ // operation that can potentially be captured. + bool isOmpDialect = op->getDialect() == ompDialect; + bool hasRegions = op->getNumRegions() > 0; -+ -+ if (capturedOp) { -+ bool isImmediateParent = false; -+ for (Region ®ion : op->getRegions()) { -+ if (®ion == capturedParentRegion) { -+ isImmediateParent = true; -+ capturedParentRegion = op->getParentRegion(); -+ break; -+ } -+ } -+ -+ // Make sure the captured op is part of a (possibly multi-level) nest of -+ // OpenMP-only operations containing no unsupported siblings at any level. -+ if ((hasRegions && isOmpDialect != isImmediateParent) || -+ (!isImmediateParent && !siblingAllowedInCapture(op))) { -+ capturedOp = nullptr; -+ capturedParentRegion = nullptr; -+ } -+ } else { -+ // The first OpenMP dialect op containing a region found while visiting -+ // in post-order should be the innermost captured OpenMP operation. -+ if (isOmpDialect && hasRegions) { -+ capturedOp = op; -+ capturedParentRegion = op->getParentRegion(); -+ -+ // Don't capture this op if it has a not-allowed sibling. -+ for (Operation &sibling : op->getParentRegion()->getOps()) { -+ if (&sibling != op && !siblingAllowedInCapture(&sibling)) { -+ capturedOp = nullptr; -+ capturedParentRegion = nullptr; -+ } -+ } -+ } -+ } ++ if (!isOmpDialect || !hasRegions) ++ return WalkResult::skip(); ++ ++ // Don't capture this op if it has a not-allowed sibling, and stop recursing ++ // into nested operations. ++ for (Operation &sibling : op->getParentRegion()->getOps()) ++ if (&sibling != op && !siblingAllowedInCapture(&sibling)) ++ return WalkResult::interrupt(); ++ ++ // Don't continue capturing nested operations if we reach an omp.loop_nest. ++ // Otherwise, process the contents of this operation. ++ capturedOp = op; ++ return llvm::isa(op) ? WalkResult::interrupt() ++ : WalkResult::advance(); + }); + + return capturedOp; @@ -12897,24 +12635,23 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Dialect/OpenMP/IR/OpenM + if (!isa_and_present(capturedOp)) + return false; + -+ Operation *workshareOp = capturedOp->getParentOp(); -+ + // Accept optional SIMD leaf construct. ++ Operation *workshareOp = capturedOp->getParentOp(); + if (isa_and_present(workshareOp)) + workshareOp = workshareOp->getParentOp(); + + if (!isa_and_present(workshareOp)) + return false; + -+ Operation *parallelOp = workshareOp->getParentOp(); -+ if (!isa_and_present(parallelOp)) ++ Operation *distributeOp = workshareOp->getParentOp(); ++ if (!isa_and_present(distributeOp)) + return false; + -+ Operation *distributeOp = parallelOp->getParentOp(); -+ if (!isa_and_present(distributeOp)) ++ Operation *parallelOp = distributeOp->getParentOp(); ++ if (!isa_and_present(parallelOp)) + return false; + -+ Operation *teamsOp = distributeOp->getParentOp(); ++ Operation *teamsOp = parallelOp->getParentOp(); + if (!isa_and_present(teamsOp)) + return false; + @@ -12924,25 +12661,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Dialect/OpenMP/IR/OpenM //===----------------------------------------------------------------------===// // ParallelOp //===----------------------------------------------------------------------===// -@@ -1525,6 +1665,17 @@ - return emitError( - "expected equal sizes for allocate and allocator variables"); - -+ auto offloadModOp = -+ llvm::cast(*(*this)->getParentOfType()); -+ if (!offloadModOp.getIsTargetDevice()) { -+ auto targetOp = (*this)->getParentOfType(); -+ if (getNumThreadsVar() && targetOp && -+ (targetOp.isTargetSPMDLoop() || -+ getSingleNestedOpOfType(targetOp.getRegion()) == *this)) -+ return emitError("num_threads argument expected to be attached to parent " -+ "omp.target operation instead"); -+ } -+ - if (failed(verifyPrivateVarList(*this))) - return failure(); - -@@ -1558,23 +1709,23 @@ +@@ -1798,24 +1961,16 @@ // Check parent region // TODO If nested inside of a target region, also check that it does not // contain any statements, declarations or directives other than this @@ -12967,24 +12686,138 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Dialect/OpenMP/IR/OpenM - if (numTeamsLowerBound.getType() != numTeamsUpperBound.getType()) - return emitError( - "expected num_teams upper bound and lower bound to be the same type"); -+ auto offloadModOp = -+ llvm::cast(*(*this)->getParentOfType()); -+ if (targetOp && !offloadModOp.getIsTargetDevice()) { -+ if (getNumTeamsLower() || getNumTeamsUpper() || getThreadLimit()) -+ return emitError("num_teams and thread_limit arguments expected to be " -+ "attached to parent omp.target operation"); -+ } else { -+ if (failed(verifyNumTeamsClause(*this, getNumTeamsLower(), -+ getNumTeamsUpper()))) -+ return failure(); - } +- } ++ if (failed( ++ verifyNumTeamsClause(*this, getNumTeamsLower(), getNumTeamsUpper()))) ++ return failure(); // Check for allocate clause restrictions -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp llvm-project/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp ---- llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 2024-06-12 10:43:15.324182065 -0500 -+++ llvm-project/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 2024-06-12 10:44:09.359614154 -0500 -@@ -264,6 +264,143 @@ - llvm_unreachable("Unknown ClauseProcBindKind kind"); + if (getAllocateVars().size() != getAllocatorVars().size()) +@@ -2120,6 +2275,7 @@ + if (!isComposite()) + return emitError() + << "'omp.composite' attribute missing from composite wrapper"; ++ + // Check for the allowed leaf constructs that may appear in a composite + // construct directly after DISTRIBUTE. + if (isa(nested)) { +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp llvm-project-aso/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +--- llvm-project-aso-orig/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 2024-11-23 20:25:27.499272806 -0600 ++++ llvm-project-aso/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 2024-11-23 20:39:47.200175294 -0600 +@@ -32,6 +32,7 @@ + #include "llvm/IR/ReplaceConstant.h" + #include "llvm/Support/FileSystem.h" + #include "llvm/TargetParser/Triple.h" ++#include "llvm/Transforms/Utils/BasicBlockUtils.h" + #include "llvm/Transforms/Utils/ModuleUtils.h" + + #include +@@ -166,6 +167,10 @@ + if (op.getDevice()) + result = todo("device"); + }; ++ auto checkDistSchedule = [&todo](auto op, LogicalResult &result) { ++ if (op.getDistScheduleStatic() || op.getDistScheduleChunkSize()) ++ result = todo("dist_schedule"); ++ }; + auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) { + if (!op.getHasDeviceAddrVars().empty()) + result = todo("has_device_addr"); +@@ -174,10 +179,6 @@ + if (op.getHint()) + op.emitWarning("hint clause discarded"); + }; +- auto checkIf = [&todo](auto op, LogicalResult &result) { +- if (op.getIfExpr()) +- result = todo("if"); +- }; + auto checkInReduction = [&todo](auto op, LogicalResult &result) { + if (!op.getInReductionVars().empty() || op.getInReductionByref() || + op.getInReductionSyms()) +@@ -224,10 +225,6 @@ + op.getReductionSyms()) + result = todo("reduction"); + }; +- auto checkThreadLimit = [&todo](auto op, LogicalResult &result) { +- if (op.getThreadLimit()) +- result = todo("thread_limit"); +- }; + auto checkTaskReduction = [&todo](auto op, LogicalResult &result) { + if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() || + op.getTaskReductionSyms()) +@@ -252,7 +249,6 @@ + .Case([&](omp::TeamsOp op) { + checkAllocate(op, result); + checkPrivate(op, result); +- checkReduction(op, result); + }) + .Case([&](omp::TaskOp op) { + checkAllocate(op, result); +@@ -287,11 +283,16 @@ + omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); }) + .Case( + [&](auto op) { checkDepend(op, result); }) ++ .Case([&](omp::DistributeOp op) { ++ checkAllocate(op, result); ++ checkDistSchedule(op, result); ++ checkOrder(op, result); ++ checkPrivate(op, result); ++ }) + .Case([&](omp::TargetOp op) { + checkAllocate(op, result); + checkDevice(op, result); + checkHasDeviceAddr(op, result); +- checkIf(op, result); + checkInReduction(op, result); + checkIsDevicePtr(op, result); + // Privatization clauses are supported, except on some situations, so we +@@ -311,7 +312,6 @@ + "structures in omp.target operation"); + } + } +- checkThreadLimit(op, result); + }) + .Default([](Operation &) { + // Assume all clauses for an operation can be translated unless they are +@@ -391,6 +391,8 @@ + Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + SmallVectorImpl *continuationBlockPHIs = nullptr) { ++ bool isLoopWrapper = isa(region.getParentOp()); ++ + llvm::BasicBlock *continuationBlock = + splitBB(builder, true, "omp.region.cont"); + llvm::BasicBlock *sourceBlock = builder.GetInsertBlock(); +@@ -412,7 +414,12 @@ + bool operandsProcessed = false; + unsigned numYields = 0; + for (Block &bb : region.getBlocks()) { +- if (omp::YieldOp yield = dyn_cast(bb.getTerminator())) { ++ // Prevent loop wrappers from crashing, as they have no terminators. ++ if (isLoopWrapper) ++ continue; ++ ++ if (omp::YieldOp yield = ++ dyn_cast_if_present(bb.getTerminator())) { + if (!operandsProcessed) { + for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { + continuationBlockPHITypes.push_back( +@@ -468,6 +475,13 @@ + moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) + return llvm::make_error(); + ++ // Create branch here for loop wrappers to prevent their lack of a ++ // terminator from causing a crash below. ++ if (isLoopWrapper) { ++ builder.CreateBr(continuationBlock); ++ continue; ++ } ++ + // Special handling for `omp.yield` and `omp.terminator` (we may have more + // than one): they return the control to the parent OpenMP dialect operation + // so replace them with the branch to the continuation block. We handle this +@@ -569,6 +583,150 @@ + return success(); } +/// Populate a set of previously created llvm.alloca instructions that are only @@ -13055,8 +12888,8 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + // relying on captured variables. + SmallVector loopInfos; + SmallVector bodyInsertPoints; -+ LogicalResult bodyGenStatus = success(); -+ auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { ++ auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, ++ llvm::Value *iv) -> llvm::Error { + // Make sure further conversions know about the induction variable. + moduleTranslation.mapValue( + loopOp.getRegion().front().getArgument(loopInfos.size()), iv); @@ -13067,7 +12900,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + bodyInsertPoints.push_back(ip); + + if (loopInfos.size() != loopOp.getNumLoops() - 1) -+ return; ++ return llvm::Error::success(); + + // Convert the body of the loop, adding lifetime markers to allocations that + // can be sunk into the new block. @@ -13076,14 +12909,16 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8; + builder.CreateLifetimeStart(alloca, builder.getInt64(size)); + } -+ llvm::BasicBlock *cont = -+ convertOmpOpRegions(loopOp.getRegion(), blockName, builder, -+ moduleTranslation, bodyGenStatus); -+ builder.SetInsertPoint(cont, cont->begin()); ++ llvm::Expected cont = convertOmpOpRegions( ++ loopOp.getRegion(), blockName, builder, moduleTranslation); ++ if (!cont) ++ return cont.takeError(); ++ builder.SetInsertPoint(*cont, (*cont)->begin()); + for (auto *alloca : allocasToSink) { + unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8; + builder.CreateLifetimeEnd(alloca, builder.getInt64(size)); + } ++ return llvm::Error::success(); + }; + + // Delegate actual loop construction to the OpenMP IRBuilder. @@ -13093,10 +12928,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + // cases. + for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) { + llvm::Value *lowerBound = -+ moduleTranslation.lookupValue(loopOp.getLowerBound()[i]); ++ moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]); + llvm::Value *upperBound = -+ moduleTranslation.lookupValue(loopOp.getUpperBound()[i]); -+ llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]); ++ moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]); ++ llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]); + + // Make sure loop trip count are emitted in the preheader of the outermost + // loop at the latest so that they are all available for the new collapsed @@ -13104,15 +12939,20 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; + llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; + if (i != 0) { -+ loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back()); ++ loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), ++ ompLoc.DL); + computeIP = loopInfos.front()->getPreheaderIP(); + } -+ loopInfos.push_back(ompBuilder->createCanonicalLoop( -+ loc, bodyGen, lowerBound, upperBound, step, -+ /*IsSigned=*/true, loopOp.getInclusive(), computeIP)); + -+ if (failed(bodyGenStatus)) ++ llvm::Expected loopResult = ++ ompBuilder->createCanonicalLoop( ++ loc, bodyGen, lowerBound, upperBound, step, ++ /*IsSigned=*/true, /*InclusiveStop=*/true, computeIP); ++ ++ if (failed(handleError(loopResult, *loopOp))) + return std::nullopt; ++ ++ loopInfos.push_back(*loopResult); + } + + // Collapse loops. Store the insertion point because LoopInfos may get @@ -13124,96 +12964,188 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + return std::make_tuple(ompLoc, afterIP, loopInfo); +} + - /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder. + /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder. static LogicalResult - convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, -@@ -676,9 +813,11 @@ - if (Value ifExprVar = op.getIfExpr()) - ifExpr = moduleTranslation.lookupValue(ifExprVar); + convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, +@@ -992,19 +1150,37 @@ + // variable allocated in the inlined region) + llvm::Value *var = builder.CreateAlloca( + moduleTranslation.convertType(reductionDecls[i].getType())); +- deferredStores.emplace_back(phis[0], var); ++ var->setName("private_redvar"); + +- privateReductionVariables[i] = var; +- moduleTranslation.mapValue(reductionArgs[i], phis[0]); +- reductionVariableMap.try_emplace(loop.getReductionVars()[i], phis[0]); ++ llvm::Type *ptrTy = llvm::PointerType::getUnqual(builder.getContext()); ++ llvm::Value *castVar = ++ builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy); ++ // TODO: I (Sergio) just guessed casting phis[0] like it's done for var is ++ // what's supposed to happen with this code coming from a merge from main, ++ // but I don't actually know. Someone more familiar with it needs to check ++ // this. ++ llvm::Value *castPhi = ++ builder.CreatePointerBitCastOrAddrSpaceCast(phis[0], ptrTy); ++ ++ deferredStores.emplace_back(castPhi, castVar); ++ ++ privateReductionVariables[i] = castVar; ++ moduleTranslation.mapValue(reductionArgs[i], castPhi); ++ reductionVariableMap.try_emplace(loop.getReductionVars()[i], castPhi); + } else { + assert(allocRegion.empty() && + "allocaction is implicit for by-val reduction"); + llvm::Value *var = builder.CreateAlloca( + moduleTranslation.convertType(reductionDecls[i].getType())); +- moduleTranslation.mapValue(reductionArgs[i], var); +- privateReductionVariables[i] = var; +- reductionVariableMap.try_emplace(loop.getReductionVars()[i], var); ++ var->setName("private_redvar"); ++ ++ llvm::Type *ptrTy = llvm::PointerType::getUnqual(builder.getContext()); ++ llvm::Value *castVar = ++ builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy); ++ ++ moduleTranslation.mapValue(reductionArgs[i], castVar); ++ privateReductionVariables[i] = castVar; ++ reductionVariableMap.try_emplace(loop.getReductionVars()[i], castVar); + } + } -+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); - llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); -- builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams( -+ builder.restoreIP(ompBuilder->createTeams( - ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr)); +@@ -1117,18 +1293,20 @@ + LLVM::ModuleTranslation &moduleTranslation, + llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, + SmallVectorImpl &reductionDecls, +- ArrayRef privateReductionVariables, ArrayRef isByRef) { ++ ArrayRef privateReductionVariables, ArrayRef isByRef, ++ bool isNowait = false, bool isTeamsReduction = false) { + // Process the reductions if required. + if (op.getNumReductionVars() == 0) + return success(); + ++ SmallVector owningReductionGens; ++ SmallVector owningAtomicReductionGens; ++ SmallVector reductionInfos; + - return bodyGenStatus; + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + // Create the reduction generators. We need to own them here because + // ReductionInfo only accepts references to the generators. +- SmallVector owningReductionGens; +- SmallVector owningAtomicReductionGens; +- SmallVector reductionInfos; + collectReductionInfo(op, builder, moduleTranslation, reductionDecls, + owningReductionGens, owningAtomicReductionGens, + privateReductionVariables, reductionInfos); +@@ -1140,7 +1318,7 @@ + builder.SetInsertPoint(tempTerminator); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint = + ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, +- isByRef, op.getNowait()); ++ isByRef, isNowait, isTeamsReduction); + + if (failed(handleError(contInsertPoint, *op))) + return failure(); +@@ -1166,7 +1344,6 @@ + return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables, + moduleTranslation, builder, + "omp.reduction.cleanup"); +- return success(); } -@@ -779,9 +918,15 @@ - continue; - llvm::Value *var = builder.CreateAlloca( - moduleTranslation.convertType(reductionDecls[i].getType())); -- moduleTranslation.mapValue(args[i], var); -- privateReductionVariables.push_back(var); -- reductionVariableMap.try_emplace(loop.getReductionVars()[i], var); -+ -+ var->setName("private_redvar"); -+ llvm::Type *ptrTy = llvm::PointerType::getUnqual(builder.getContext()); -+ llvm::Value *castVar = -+ builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy); -+ -+ moduleTranslation.mapValue(args[i], castVar); -+ privateReductionVariables.push_back(castVar); -+ reductionVariableMap.try_emplace(loop.getReductionVars()[i], castVar); - } + static ArrayRef getIsByRef(std::optional> attr) { +@@ -1429,9 +1606,9 @@ + builder.restoreIP(*afterIP); + + // Process the reductions if required. +- return createReductionsAndCleanup(sectionsOp, builder, moduleTranslation, +- allocaIP, reductionDecls, +- privateReductionVariables, isByRef); ++ return createReductionsAndCleanup( ++ sectionsOp, builder, moduleTranslation, allocaIP, reductionDecls, ++ privateReductionVariables, isByRef, sectionsOp.getNowait()); } -@@ -815,7 +960,6 @@ - const SmallVector &privateReductionVariables, - SmallVector &reductionInfos) { - unsigned numReductions = loop.getNumReductionVars(); -- - for (unsigned i = 0; i < numReductions; ++i) { - owningReductionGens.push_back( - makeReductionGen(reductionDecls[i], builder, moduleTranslation)); -@@ -825,10 +969,12 @@ - - // Collect the reduction information. - reductionInfos.reserve(numReductions); -+ - for (unsigned i = 0; i < numReductions; ++i) { - llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr; - if (owningAtomicReductionGens[i]) - atomicGen = owningAtomicReductionGens[i]; -+ - llvm::Value *variable = - moduleTranslation.lookupValue(loop.getReductionVars()[i]); - reductionInfos.push_back( -@@ -883,9 +1029,15 @@ + /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder. +@@ -1485,6 +1662,33 @@ + if (failed(checkImplementationStatus(*op))) + return failure(); + ++ llvm::ArrayRef isByRef = getIsByRef(op.getReductionByref()); ++ assert(isByRef.size() == op.getNumReductionVars()); ++ ++ SmallVector reductionDecls; ++ collectReductionDecls(op, reductionDecls); ++ llvm::OpenMPIRBuilder::InsertPointTy allocaIP = ++ findAllocaInsertPoint(builder, moduleTranslation); ++ ++ SmallVector privateReductionVariables( ++ op.getNumReductionVars()); ++ DenseMap reductionVariableMap; ++ ++ MutableArrayRef reductionArgs = ++ llvm::cast(*op).getReductionBlockArgs(); ++ ++ if (failed(allocAndInitializeReductionVars( ++ op, reductionArgs, builder, moduleTranslation, allocaIP, ++ reductionDecls, privateReductionVariables, reductionVariableMap, ++ isByRef))) ++ return failure(); ++ ++ // Store the mapping between reduction variables and their private copies on ++ // ModuleTranslation stack. It can be then recovered when translating ++ // omp.reduce operations in a separate call. ++ LLVM::ModuleTranslation::SaveStack mappingGuard( ++ moduleTranslation, reductionVariableMap); ++ + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { + LLVM::ModuleTranslation::SaveStack frame( + moduleTranslation, allocaIP); +@@ -1519,6 +1723,13 @@ + return failure(); + + builder.restoreIP(*afterIP); ++ ++ // Process the reductions if required. ++ return createReductionsAndCleanup( ++ op, builder, moduleTranslation, allocaIP, reductionDecls, ++ privateReductionVariables, isByRef, ++ /*isNoWait*/ false, /*isTeamsReduction*/ true); ++ + return success(); } - /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. --static LogicalResult --convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, -- LLVM::ModuleTranslation &moduleTranslation) { -+static LogicalResult convertOmpWsloop( -+ Operation &opInst, llvm::IRBuilderBase &builder, -+ LLVM::ModuleTranslation &moduleTranslation, -+ llvm::OpenMPIRBuilder::InsertPointTy redAllocaIP, -+ SmallVector &owningReductionGens, -+ SmallVector &owningAtomicReductionGens, -+ SmallVector &reductionInfos) { +@@ -1713,6 +1924,11 @@ + static LogicalResult + convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { ++ llvm::OpenMPIRBuilder::InsertPointTy redAllocaIP = ++ findAllocaInsertPoint(builder, moduleTranslation); ++ + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + // FIXME: This ignores any other nested wrappers (e.g. omp.simd). auto wsloopOp = cast(opInst); - auto loopOp = cast(wsloopOp.getWrappedLoop()); - -@@ -908,12 +1060,10 @@ + if (failed(checkImplementationStatus(opInst))) + return failure(); +@@ -1738,8 +1954,6 @@ SmallVector reductionDecls; collectReductionDecls(wsloopOp, reductionDecls); - llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); - SmallVector privateReductionVariables; - DenseMap reductionVariableMap; -- allocByValReductionVars(wsloopOp, builder, moduleTranslation, allocaIP, -+ allocByValReductionVars(wsloopOp, builder, moduleTranslation, redAllocaIP, - reductionDecls, privateReductionVariables, - reductionVariableMap, isByRef); + SmallVector privateReductionVariables( + wsloopOp.getNumReductionVars()); +@@ -1749,7 +1963,7 @@ + cast(opInst).getReductionBlockArgs(); -@@ -966,6 +1116,9 @@ + if (failed(allocAndInitializeReductionVars( +- wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP, ++ wsloopOp, reductionArgs, builder, moduleTranslation, redAllocaIP, + reductionDecls, privateReductionVariables, reductionVariableMap, + isByRef))) + return failure(); +@@ -1770,6 +1984,9 @@ // Set up the source location value for OpenMP runtime. llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); @@ -13221,42 +13153,49 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + getSinkableAllocas(moduleTranslation, loopOp.getRegion(), allocasToSink); + // Generator of the canonical loop body. - // TODO: support error propagation in OpenMPIRBuilder and use it instead of - // relying on captured variables. -@@ -985,10 +1138,21 @@ + SmallVector loopInfos; + SmallVector bodyInsertPoints; +@@ -1787,11 +2004,27 @@ if (loopInfos.size() != loopOp.getNumLoops() - 1) - return; + return llvm::Error::success(); - // Convert the body of the loop. + // Convert the body of the loop, adding lifetime markers to allocations that + // can be sunk into the new block. builder.restoreIP(ip); -- convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder, -- moduleTranslation, bodyGenStatus); +- return convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder, +- moduleTranslation) +- .takeError(); + for (auto *alloca : allocasToSink) { + unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8; + builder.CreateLifetimeStart(alloca, builder.getInt64(size)); + } -+ llvm::BasicBlock *cont = ++ ++ llvm::Expected cont = + convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder, -+ moduleTranslation, bodyGenStatus); -+ builder.SetInsertPoint(cont, cont->begin()); ++ moduleTranslation); ++ if (!cont) ++ return cont.takeError(); ++ ++ builder.SetInsertPoint(*cont, (*cont)->begin()); ++ + for (auto *alloca : allocasToSink) { + unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8; + builder.CreateLifetimeEnd(alloca, builder.getInt64(size)); + } ++ return llvm::Error::success(); }; // Delegate actual loop construction to the OpenMP IRBuilder. -@@ -996,7 +1160,6 @@ +@@ -1799,7 +2032,6 @@ // loop, i.e. it has a positive step, uses signed integer semantics. // Reconsider this code when the nested loop operation clearly supports more // cases. - llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) { llvm::Value *lowerBound = - moduleTranslation.lookupValue(loopOp.getLowerBound()[i]); -@@ -1027,7 +1190,8 @@ + moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]); +@@ -1834,19 +2066,30 @@ llvm::CanonicalLoopInfo *loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); @@ -13265,11 +13204,16 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + findAllocaInsertPoint(builder, moduleTranslation); // TODO: Handle doacross loops when the ordered clause has a parameter. - bool isOrdered = wsloopOp.getOrderedVal().has_value(); -@@ -1035,11 +1199,22 @@ - wsloopOp.getScheduleModifier(); - bool isSimd = wsloopOp.getSimdModifier(); - + bool isOrdered = wsloopOp.getOrdered().has_value(); + std::optional scheduleMod = wsloopOp.getScheduleMod(); + bool isSimd = wsloopOp.getScheduleSimd(); + +- llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = +- ompBuilder->applyWorkshareLoop( +- ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(), +- convertToScheduleKind(schedule), chunk, isSimd, +- scheduleMod == omp::ScheduleModifier::monotonic, +- scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered); + bool distributeCodeGen = opInst.getParentOfType(); + bool parallelCodeGen = opInst.getParentOfType(); + llvm::omp::WorksharingLoopType workshareLoopType; @@ -13280,77 +13224,39 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + } else { + workshareLoopType = llvm::omp::WorksharingLoopType::ForStaticLoop; + } - ompBuilder->applyWorkshareLoop( - ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(), - convertToScheduleKind(schedule), chunk, isSimd, - scheduleModifier == omp::ScheduleModifier::monotonic, -- scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered); -+ scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered, ++ llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = ompBuilder->applyWorkshareLoop( ++ ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(), ++ convertToScheduleKind(schedule), chunk, isSimd, ++ scheduleMod == omp::ScheduleModifier::monotonic, ++ scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered, + workshareLoopType); - // Continue building IR after the loop. Note that the LoopInfo returned by - // `collapseLoops` points inside the outermost loop and is intended for -@@ -1053,21 +1228,20 @@ - - // Create the reduction generators. We need to own them here because - // ReductionInfo only accepts references to the generators. -- SmallVector owningReductionGens; -- SmallVector owningAtomicReductionGens; -- SmallVector reductionInfos; - collectReductionInfo(wsloopOp, builder, moduleTranslation, reductionDecls, - owningReductionGens, owningAtomicReductionGens, - privateReductionVariables, reductionInfos); -- - // The call to createReductions below expects the block to have a - // terminator. Create an unreachable instruction to serve as terminator - // and remove it later. - llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); - builder.SetInsertPoint(tempTerminator); -+ - llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = - ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, -- isByRef, wsloopOp.getNowait()); -+ isByRef, wsloopOp.getNowait(), -+ /*IsTeamsReduction=*/false, -+ /*HasDistribute=*/distributeCodeGen); - if (!contInsertPoint.getBlock()) - return wsloopOp->emitOpError() << "failed to convert reductions"; - auto nextInsertionPoint = -@@ -1086,6 +1260,20 @@ - "omp.reduction.cleanup"); + if (failed(handleError(wsloopIP, opInst))) + return failure(); +@@ -1858,9 +2101,10 @@ + builder.restoreIP(afterIP); + + // Process the reductions if required. +- return createReductionsAndCleanup(wsloopOp, builder, moduleTranslation, +- allocaIP, reductionDecls, +- privateReductionVariables, isByRef); ++ return createReductionsAndCleanup( ++ wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls, ++ privateReductionVariables, isByRef, wsloopOp.getNowait(), ++ /*isTeamsReduction=*/false); } -+static LogicalResult -+convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, -+ LLVM::ModuleTranslation &moduleTranslation) { -+ llvm::OpenMPIRBuilder::InsertPointTy redAllocaIP = -+ findAllocaInsertPoint(builder, moduleTranslation); -+ SmallVector owningReductionGens; -+ SmallVector owningAtomicReductionGens; -+ SmallVector reductionInfos; -+ -+ return convertOmpWsloop(opInst, builder, moduleTranslation, redAllocaIP, -+ owningReductionGens, owningAtomicReductionGens, -+ reductionInfos); -+} -+ - /// A RAII class that on construction replaces the region arguments of the - /// parallel op (which correspond to private variables) with the actual private - /// variables they correspond to. This prepares the parallel op so that it -@@ -1224,10 +1412,10 @@ - // Generate reductions from info - llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); - builder.SetInsertPoint(tempTerminator); -- - llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = + /// Converts the OpenMP parallel operation to LLVM IR. +@@ -2072,7 +2316,7 @@ + + llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint = ompBuilder->createReductions(builder.saveIP(), allocaIP, - reductionInfos, isByRef, false); -+ reductionInfos, isByRef, false, false, -+ false); - if (!contInsertPoint.getBlock()) { - bodyGenStatus = opInst->emitOpError() << "failed to convert reductions"; - return; -@@ -1409,71 +1597,16 @@ ++ reductionInfos, isByRef, false, false); + if (!contInsertPoint) + return contInsertPoint.takeError(); + +@@ -2171,77 +2415,19 @@ static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -13358,15 +13264,16 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O auto simdOp = cast(opInst); auto loopOp = cast(simdOp.getWrappedLoop()); + if (failed(checkImplementationStatus(opInst))) + return failure(); + - llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - - // Generator of the canonical loop body. -- // TODO: support error propagation in OpenMPIRBuilder and use it instead of -- // relying on captured variables. - SmallVector loopInfos; - SmallVector bodyInsertPoints; -- LogicalResult bodyGenStatus = success(); -- auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { +- auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, +- llvm::Value *iv) -> llvm::Error { - // Make sure further conversions know about the induction variable. - moduleTranslation.mapValue( - loopOp.getRegion().front().getArgument(loopInfos.size()), iv); @@ -13377,12 +13284,13 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O - bodyInsertPoints.push_back(ip); - - if (loopInfos.size() != loopOp.getNumLoops() - 1) -- return; +- return llvm::Error::success(); - - // Convert the body of the loop. - builder.restoreIP(ip); -- convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder, -- moduleTranslation, bodyGenStatus); +- return convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder, +- moduleTranslation) +- .takeError(); - }; - - // Delegate actual loop construction to the OpenMP IRBuilder. @@ -13393,10 +13301,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O - llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); - for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) { - llvm::Value *lowerBound = -- moduleTranslation.lookupValue(loopOp.getLowerBound()[i]); +- moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]); - llvm::Value *upperBound = -- moduleTranslation.lookupValue(loopOp.getUpperBound()[i]); -- llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]); +- moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]); +- llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]); - - // Make sure loop trip count are emitted in the preheader of the outermost - // loop at the latest so that they are all available for the new collapsed @@ -13408,18 +13316,22 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O - ompLoc.DL); - computeIP = loopInfos.front()->getPreheaderIP(); - } -- loopInfos.push_back(ompBuilder->createCanonicalLoop( -- loc, bodyGen, lowerBound, upperBound, step, -- /*IsSigned=*/true, /*Inclusive=*/true, computeIP)); +- +- llvm::Expected loopResult = +- ompBuilder->createCanonicalLoop( +- loc, bodyGen, lowerBound, upperBound, step, +- /*IsSigned=*/true, /*InclusiveStop=*/true, computeIP); +- +- if (failed(handleError(loopResult, *loopOp))) +- return failure(); +- +- loopInfos.push_back(*loopResult); +- } + auto loopNestConversionResult = convertLoopNestHelper( + *loopOp, builder, moduleTranslation, "omp.simd.region"); + if (!loopNestConversionResult) + return failure(); -- if (failed(bodyGenStatus)) -- return failure(); -- } -- - // Collapse loops. - llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); - llvm::CanonicalLoopInfo *loopInfo = @@ -13428,7 +13340,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O llvm::ConstantInt *simdlen = nullptr; if (std::optional simdlenVar = simdOp.getSimdlen()) -@@ -1962,7 +2095,8 @@ +@@ -2751,7 +2937,8 @@ // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need // some adjustment for members with more complex types. return builder.CreateMul(elementCount, @@ -13438,60 +13350,80 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O } } -@@ -2725,8 +2859,12 @@ - argIndex++; - } - -- bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region", -- builder, moduleTranslation); +@@ -3175,7 +3362,7 @@ + + combinedInfo.Types.emplace_back(mapFlag); + combinedInfo.DevicePointers.emplace_back( +- mapData.DevicePointers[memberDataIdx]); ++ llvm::OpenMPIRBuilder::DeviceInfoTy::None); + combinedInfo.Names.emplace_back( + LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder)); + uint64_t basePointerIndex = +@@ -3557,9 +3744,14 @@ + return info.DevicePtrInfoMap[basePointer].second; + }); + +- if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder, +- moduleTranslation))) +- return llvm::make_error(); + SmallVector phis; -+ llvm::BasicBlock *continuationBlock = ++ llvm::Expected continuationBlock = + convertOmpOpRegions(region, "omp.data.region", builder, -+ moduleTranslation, bodyGenStatus, &phis); -+ builder.SetInsertPoint(continuationBlock, -+ continuationBlock->getFirstInsertionPt()); ++ moduleTranslation, &phis); ++ if (!continuationBlock) ++ return continuationBlock.takeError(); ++ builder.SetInsertPoint(*continuationBlock, ++ (*continuationBlock)->getFirstInsertionPt()); } break; case BodyGenTy::DupNoPriv: -@@ -2735,8 +2873,12 @@ +@@ -3568,6 +3760,7 @@ // If device info is available then region has already been generated if (info.DevicePtrInfoMap.empty()) { builder.restoreIP(codeGenIP); -- bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region", -- builder, moduleTranslation); ++ + // For device pass, if use_device_ptr(addr) mappings were present, + // we need to link them here before codegen. + if (ompBuilder->Config.IsTargetDevice.value_or(false)) { +@@ -3579,9 +3772,14 @@ + useDevicePtrVars, mapData); + } + +- if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder, +- moduleTranslation))) +- return llvm::make_error(); + SmallVector phis; -+ llvm::BasicBlock *continuationBlock = ++ llvm::Expected continuationBlock = + convertOmpOpRegions(region, "omp.data.region", builder, -+ moduleTranslation, bodyGenStatus, &phis); -+ builder.SetInsertPoint(continuationBlock, -+ continuationBlock->getFirstInsertionPt()); ++ moduleTranslation, &phis); ++ if (!continuationBlock) ++ return continuationBlock.takeError(); ++ builder.SetInsertPoint(*continuationBlock, ++ (*continuationBlock)->getFirstInsertionPt()); } break; } -@@ -2759,6 +2901,90 @@ - return bodyGenStatus; +@@ -3608,6 +3806,64 @@ + return success(); } -+static LogicalResult convertOmpDistribute( -+ Operation &opInst, llvm::IRBuilderBase &builder, -+ LLVM::ModuleTranslation &moduleTranslation, -+ llvm::OpenMPIRBuilder::InsertPointTy *redAllocaIP, -+ SmallVector &reductionInfos) { ++static LogicalResult ++convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, ++ LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); -+ // FIXME: This ignores any other nested wrappers (e.g. omp.parallel + -+ // omp.wsloop, omp.simd). ++ // FIXME: This ignores any other nested wrappers (e.g. omp.wsloop, omp.simd). + auto distributeOp = cast(opInst); ++ if (failed(checkImplementationStatus(opInst))) ++ return failure(); ++ + auto loopOp = cast(distributeOp.getWrappedLoop()); + + SmallVector loopWrappers; + loopOp.gatherWrappers(loopWrappers); + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; -+ // TODO: support error propagation in OpenMPIRBuilder and use it instead of -+ // relying on captured variables. -+ LogicalResult bodyGenStatus = success(); -+ -+ auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { ++ auto bodyGenCB = [&](InsertPointTy allocaIP, ++ InsertPointTy codeGenIP) -> llvm::Error { + // Save the alloca insertion point on ModuleTranslation stack for use in + // nested regions. + LLVM::ModuleTranslation::SaveStack frame( @@ -13499,79 +13431,44 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + + // DistributeOp has only one region associated with it. + builder.restoreIP(codeGenIP); -+ *redAllocaIP = allocaIP; + + if (loopWrappers.size() == 1) { + // Convert a standalone DISTRIBUTE construct. + auto loopNestConversionResult = convertLoopNestHelper( + *loopOp, builder, moduleTranslation, "omp.distribute.region"); + if (!loopNestConversionResult) -+ return; // TODO: Signal error to abort translation. ++ return llvm::make_error(); + + builder.restoreIP(std::get(*loopNestConversionResult)); + } else { + // Convert a DISTRIBUTE leaf as part of a composite construct. + mlir::Region ® = distributeOp.getRegion(); -+ auto *regionBlock = -+ convertOmpOpRegions(reg, "omp.distribute.region", builder, -+ moduleTranslation, bodyGenStatus); -+ -+ builder.SetInsertPoint(regionBlock->getTerminator()); -+ } -+ -+ // FIXME(JAN): We need to know if we are inside a distribute and -+ // if there is an inner wsloop reduction, in that case we need to -+ // generate the teams reduction bits to combine everything correctly. We -+ // will try to collect the reduction info from the inner wsloop and use -+ // that instead of the reduction clause that could have been on the -+ // omp.parallel -+ auto IP = builder.saveIP(); -+ if (ompBuilder->Config.isGPU()) { -+ // TODO: Consider passing the isByref array together with reductionInfos -+ // if it needs to match nested parallel-do or simd. -+ SmallVector isByref(reductionInfos.size(), true); -+ llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = -+ ompBuilder->createReductions(IP, allocaIP, reductionInfos, isByref, -+ /*IsNoWait=*/false, -+ /*IsTeamsReduction=*/true); -+ builder.restoreIP(contInsertPoint); ++ llvm::Expected regionBlock = convertOmpOpRegions( ++ reg, "omp.distribute.region", builder, moduleTranslation); ++ if (!regionBlock) ++ return regionBlock.takeError(); ++ builder.SetInsertPoint((*regionBlock)->getTerminator()); + } ++ return llvm::Error::success(); + }; + + llvm::OpenMPIRBuilder::InsertPointTy allocaIP = + findAllocaInsertPoint(builder, moduleTranslation); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); -+ builder.restoreIP(ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB)); ++ llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = ++ ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB); + -+ return success(); -+} ++ if (!afterIP) ++ return opInst.emitError(llvm::toString(afterIP.takeError())); ++ builder.restoreIP(*afterIP); + -+static LogicalResult -+convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, -+ LLVM::ModuleTranslation &moduleTranslation) { -+ // No reductions are present so we just create dummy variables. -+ llvm::OpenMPIRBuilder::InsertPointTy dummyRedAllocaIP; -+ SmallVector dummyReductionInfos; -+ return convertOmpDistribute(opInst, builder, moduleTranslation, -+ &dummyRedAllocaIP, dummyReductionInfos); ++ return success(); +} + /// Lowers the FlagsAttr which is applied to the module on the device /// pass when offloading, this attribute contains OpenMP RTL globals that can /// be passed as flags to the frontend, otherwise they are set to default -@@ -2831,11 +3057,6 @@ - return false; - } - -- if (targetOp.getThreadLimit()) { -- opInst.emitError("Thread limit clause not yet supported"); -- return false; -- } -- - if (targetOp.getNowait()) { - opInst.emitError("Nowait clause not yet supported"); - return false; -@@ -2950,7 +3171,7 @@ +@@ -3783,7 +4039,7 @@ ompBuilder.M.getDataLayout().getProgramAddressSpace(); // Create the alloca for the argument the current point. @@ -13579,8 +13476,8 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS, nullptr); if (allocaAS != defaultAS && arg.getType()->isPointerTy()) - v = builder.CreatePointerBitCastOrAddrSpaceCast( -@@ -2980,6 +3201,181 @@ + v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS)); +@@ -3814,6 +4070,301 @@ return builder.saveIP(); } @@ -13598,6 +13495,95 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + return op->getParentOfType(); +} + ++static uint64_t getTypeByteSize(mlir::Type type, DataLayout dl) { ++ uint64_t sizeInBits = dl.getTypeSizeInBits(type); ++ uint64_t sizeInBytes = sizeInBits / 8; ++ return sizeInBytes; ++} ++ ++template ++static uint64_t getReductionDataSize(OpTy &op) { ++ if (op.getNumReductionVars() > 0) { ++ assert(op.getNumReductionVars() && ++ "Only 1 reduction variable currently supported"); ++ mlir::Type reductionVarTy = op.getReductionVars()[0].getType(); ++ Operation *opp = op.getOperation(); ++ DataLayout dl = DataLayout(opp->getParentOfType()); ++ return getTypeByteSize(reductionVarTy, dl); ++ } ++ return 0; ++} ++ ++static uint64_t getTeamsReductionDataSize(mlir::omp::TeamsOp &teamsOp) { ++ return getReductionDataSize(teamsOp); ++} ++ ++/// Follow uses of `host_eval`-defined block arguments of the given `omp.target` ++/// operation and populate output variables with their corresponding host value ++/// (i.e. operand evaluated outside of the target region), based on their uses ++/// inside of the target region. ++/// ++/// Loop bounds and steps are only optionally populated, if output vectors are ++/// provided. ++static void ++extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, ++ Value &numTeamsLower, Value &numTeamsUpper, ++ Value &threadLimit, ++ llvm::SmallVectorImpl *lowerBounds = nullptr, ++ llvm::SmallVectorImpl *upperBounds = nullptr, ++ llvm::SmallVectorImpl *steps = nullptr) { ++ auto blockArgIface = llvm::cast(*targetOp); ++ for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(), ++ blockArgIface.getHostEvalBlockArgs())) { ++ Value hostEvalVar = std::get<0>(item), blockArg = std::get<1>(item); ++ ++ for (Operation *user : blockArg.getUsers()) { ++ llvm::TypeSwitch(user) ++ .Case([&](omp::TeamsOp teamsOp) { ++ if (teamsOp.getNumTeamsLower() == blockArg) ++ numTeamsLower = hostEvalVar; ++ else if (teamsOp.getNumTeamsUpper() == blockArg) ++ numTeamsUpper = hostEvalVar; ++ else if (teamsOp.getThreadLimit() == blockArg) ++ threadLimit = hostEvalVar; ++ else ++ llvm_unreachable("unsupported host_eval use"); ++ }) ++ .Case([&](omp::ParallelOp parallelOp) { ++ if (parallelOp.getNumThreads() == blockArg) ++ numThreads = hostEvalVar; ++ else ++ llvm_unreachable("unsupported host_eval use"); ++ }) ++ .Case([&](omp::LoopNestOp loopOp) { ++ auto processBounds = ++ [&](OperandRange opBounds, ++ llvm::SmallVectorImpl *outBounds) -> bool { ++ bool found = false; ++ for (auto [i, lb] : llvm::enumerate(opBounds)) { ++ if (lb == blockArg) { ++ found = true; ++ if (outBounds) ++ (*outBounds)[i] = hostEvalVar; ++ } ++ } ++ return found; ++ }; ++ bool found = ++ processBounds(loopOp.getLoopLowerBounds(), lowerBounds); ++ found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) || ++ found; ++ found = processBounds(loopOp.getLoopSteps(), steps) || found; ++ if (!found) ++ llvm_unreachable("unsupported host_eval use"); ++ }) ++ .Default([](Operation *) { ++ llvm_unreachable("unsupported host_eval use"); ++ }); ++ } ++ } ++} ++ +/// Populate default `MinTeams`, `MaxTeams` and `MaxThreads` to their default +/// values as stated by the corresponding clauses, if constant. +/// @@ -13608,29 +13594,52 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + omp::TargetOp targetOp, + llvm::OpenMPIRBuilder::TargetKernelDefaultBounds &bounds, + bool isTargetDevice, bool isGPU) { -+ // TODO Handle constant IF clauses -+ Operation *innermostCapturedOmpOp = targetOp.getInnermostCapturedOmpOp(); ++ // TODO: Handle constant 'if' clauses. ++ Operation *capturedOp = targetOp.getInnermostCapturedOmpOp(); ++ ++ // Extract values for host-evaluated clauses. ++ Value numThreads, numTeamsLower, numTeamsUpper, threadLimit; ++ if (!isTargetDevice) { ++ extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper, ++ threadLimit); ++ } else { ++ // In the target device, values for these clauses are not passed as ++ // host_eval, but instead evaluated prior to entry to the region. This ++ // ensures values are mapped and available inside of the target region. ++ if (auto teamsOp = castOrGetParentOfType(capturedOp)) { ++ numTeamsLower = teamsOp.getNumTeamsLower(); ++ numTeamsUpper = teamsOp.getNumTeamsUpper(); ++ threadLimit = teamsOp.getThreadLimit(); ++ } ++ ++ if (auto parallelOp = castOrGetParentOfType(capturedOp)) ++ numThreads = parallelOp.getNumThreads(); ++ } ++ ++ auto extractConstInteger = [](Value value) -> std::optional { ++ if (auto constOp = ++ dyn_cast_if_present(value.getDefiningOp())) ++ if (auto constAttr = dyn_cast(constOp.getValue())) ++ return constAttr.getInt(); ++ ++ return std::nullopt; ++ }; + + // Handle clauses impacting the number of teams. ++ + int32_t minTeamsVal = 1, maxTeamsVal = -1; -+ if (auto teamsOp = -+ castOrGetParentOfType(innermostCapturedOmpOp)) { -+ // TODO Use teamsOp.getNumTeamsLower() to initialize `minTeamsVal`. For now, -+ // just match clang and set min and max to the same value. -+ Value numTeamsClause = isTargetDevice ? teamsOp.getNumTeamsUpper() -+ : targetOp.getNumTeamsUpper(); -+ if (numTeamsClause) { -+ if (auto constOp = dyn_cast_if_present( -+ numTeamsClause.getDefiningOp())) { -+ if (auto constAttr = constOp.getValue().dyn_cast()) -+ minTeamsVal = maxTeamsVal = constAttr.getInt(); -+ } ++ if (castOrGetParentOfType(capturedOp)) { ++ // TODO: Use `hostNumTeamsLower` to initialize `minTeamsVal`. For now, match ++ // clang and set min and max to the same value. ++ if (numTeamsUpper) { ++ if (auto val = extractConstInteger(numTeamsUpper)) ++ minTeamsVal = maxTeamsVal = *val; + } else { + minTeamsVal = maxTeamsVal = 0; + } -+ } else if (castOrGetParentOfType(innermostCapturedOmpOp, ++ } else if (castOrGetParentOfType(capturedOp, + /*immediateParent=*/true) || -+ castOrGetParentOfType(innermostCapturedOmpOp, ++ castOrGetParentOfType(capturedOp, + /*immediateParent=*/true)) { + minTeamsVal = maxTeamsVal = 1; + } else { @@ -13638,46 +13647,33 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + } + + // Handle clauses impacting the number of threads. -+ int32_t targetThreadLimitVal = -1; -+ int32_t teamsThreadLimitVal = -1; -+ int32_t maxThreadsVal = -1; + -+ auto setMaxValueFromClause = [](Value clauseValue, int32_t &result) { -+ if (clauseValue) { -+ if (auto constOp = dyn_cast_if_present( -+ clauseValue.getDefiningOp())) { -+ if (auto constAttr = constOp.getValue().dyn_cast()) -+ result = constAttr.getInt(); -+ } -+ // Found an applicable clause, so it's not undefined. Mark as unknown -+ // because it's not constant. -+ if (result < 0) -+ result = 0; -+ } ++ auto setMaxValueFromClause = [&extractConstInteger](Value clauseValue, ++ int32_t &result) { ++ if (!clauseValue) ++ return; ++ ++ if (auto val = extractConstInteger(clauseValue)) ++ result = *val; ++ ++ // Found an applicable clause, so it's not undefined. Mark as unknown ++ // because it's not constant. ++ if (result < 0) ++ result = 0; + }; + + // Extract THREAD_LIMIT clause from TARGET and TEAMS directives. ++ int32_t targetThreadLimitVal = -1, teamsThreadLimitVal = -1; + setMaxValueFromClause(targetOp.getThreadLimit(), targetThreadLimitVal); -+ -+ if (auto teamsOp = -+ castOrGetParentOfType(innermostCapturedOmpOp)) { -+ Value threadLimitClause = isTargetDevice ? teamsOp.getThreadLimit() -+ : targetOp.getTeamsThreadLimit(); -+ setMaxValueFromClause(threadLimitClause, teamsThreadLimitVal); -+ } ++ setMaxValueFromClause(threadLimit, teamsThreadLimitVal); + + // Extract MAX_THREADS clause from PARALLEL or set to 1 if it's SIMD. -+ if (innermostCapturedOmpOp) { -+ if (auto parallelOp = -+ castOrGetParentOfType(innermostCapturedOmpOp)) { -+ Value numThreadsClause = isTargetDevice ? parallelOp.getNumThreadsVar() -+ : targetOp.getNumThreads(); -+ setMaxValueFromClause(numThreadsClause, maxThreadsVal); -+ } else if (castOrGetParentOfType(innermostCapturedOmpOp, -+ /*immediateParent=*/true)) { -+ maxThreadsVal = 1; -+ } -+ } ++ int32_t maxThreadsVal = -1; ++ if (castOrGetParentOfType(capturedOp)) ++ setMaxValueFromClause(numThreads, maxThreadsVal); ++ else if (castOrGetParentOfType(capturedOp, ++ /*immediateParent=*/true)) ++ maxThreadsVal = 1; + + // For max values, < 0 means unset, == 0 means set but unknown. Select the + // minimum value between MAX_THREADS and THREAD_LIMIT clauses that were set. @@ -13693,41 +13689,17 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + // Calculate reduction data size, limited to single reduction variable + // for now. + int32_t reductionDataSize = 0; -+ if (isGPU && innermostCapturedOmpOp) { -+ if (auto loopNestOp = -+ mlir::dyn_cast(innermostCapturedOmpOp)) { -+ // FIXME: This treats 'DO SIMD' as if it was a 'DO' construct. Reductions -+ // on other constructs apart from 'DO' aren't considered either. -+ mlir::omp::WsloopOp wsloopOp = nullptr; -+ SmallVector wrappers; -+ loopNestOp.gatherWrappers(wrappers); -+ for (auto wrapper : wrappers) { -+ wsloopOp = mlir::dyn_cast(*wrapper); -+ if (wsloopOp) -+ break; -+ } -+ if (wsloopOp) { -+ if (wsloopOp.getNumReductionVars() > 0) { -+ assert(wsloopOp.getNumReductionVars() && -+ "Only 1 reduction variable currently supported"); -+ mlir::Value reductionVar = wsloopOp.getReductionVars()[0]; -+ DataLayout dl = -+ DataLayout(innermostCapturedOmpOp->getParentOfType()); -+ -+ mlir::Type reductionVarTy = reductionVar.getType(); -+ uint64_t sizeInBits = dl.getTypeSizeInBits(reductionVarTy); -+ uint64_t sizeInBytes = sizeInBits / 8; -+ reductionDataSize = sizeInBytes; -+ } -+ } ++ if (isGPU && capturedOp) { ++ if (auto teamsOp = castOrGetParentOfType(capturedOp)) { ++ reductionDataSize = getTeamsReductionDataSize(teamsOp); + } + } + + // Update kernel bounds structure for the `OpenMPIRBuilder` to use. + bounds.MinTeams = minTeamsVal; -+ bounds.MaxTeams = maxTeamsVal; ++ bounds.MaxTeams.push_back(maxTeamsVal); + bounds.MinThreads = 1; -+ bounds.MaxThreads = combinedMaxThreadsVal; ++ bounds.MaxThreads.push_back(combinedMaxThreadsVal); + bounds.ReductionDataSize = reductionDataSize; + if (bounds.ReductionDataSize != 0) + bounds.ReductionBufferLength = 1024; @@ -13740,77 +13712,108 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O +/// only provide correct results if it's called after the body of \c targetOp +/// has been fully generated. +static void initTargetRuntimeBounds( -+ LLVM::ModuleTranslation &moduleTranslation, omp::TargetOp targetOp, ++ llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, ++ omp::TargetOp targetOp, + llvm::OpenMPIRBuilder::TargetKernelRuntimeBounds &bounds) { ++ omp::LoopNestOp loopOp = castOrGetParentOfType( ++ targetOp.getInnermostCapturedOmpOp()); ++ unsigned numLoops = loopOp ? loopOp.getNumLoops() : 0; ++ ++ Value numThreads, numTeamsLower, numTeamsUpper, teamsThreadLimit; ++ llvm::SmallVector lowerBounds(numLoops), upperBounds(numLoops), ++ steps(numLoops); ++ extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper, ++ teamsThreadLimit, &lowerBounds, &upperBounds, &steps); ++ + // TODO Handle IF clauses. -+ if (Value numTeamsLower = targetOp.getNumTeamsLower()) ++ llvm::Value *&llvmTargetThreadLimit = ++ bounds.TargetThreadLimit.emplace_back(nullptr); ++ if (Value targetThreadLimit = targetOp.getThreadLimit()) ++ llvmTargetThreadLimit = moduleTranslation.lookupValue(targetThreadLimit); ++ ++ if (numTeamsLower) + bounds.MinTeams = moduleTranslation.lookupValue(numTeamsLower); + -+ if (Value numTeamsUpper = targetOp.getNumTeamsUpper()) -+ bounds.MaxTeams = moduleTranslation.lookupValue(numTeamsUpper); ++ llvm::Value *&llvmMaxTeams = bounds.MaxTeams.emplace_back(nullptr); ++ if (numTeamsUpper) ++ llvmMaxTeams = moduleTranslation.lookupValue(numTeamsUpper); + -+ if (Value teamsThreadLimit = targetOp.getTeamsThreadLimit()) -+ bounds.TeamsThreadLimit = moduleTranslation.lookupValue(teamsThreadLimit); ++ llvm::Value *&llvmTeamsThreadLimit = ++ bounds.TeamsThreadLimit.emplace_back(nullptr); ++ if (teamsThreadLimit) ++ llvmTeamsThreadLimit = moduleTranslation.lookupValue(teamsThreadLimit); + -+ if (Value numThreads = targetOp.getNumThreads()) ++ if (numThreads) + bounds.MaxThreads = moduleTranslation.lookupValue(numThreads); + -+ if (Value tripCount = targetOp.getTripCount()) -+ bounds.LoopTripCount = moduleTranslation.lookupValue(tripCount); ++ if (targetOp.isTargetSPMDLoop()) { ++ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); ++ bounds.LoopTripCount = nullptr; ++ ++ // To calculate the trip count, we multiply together the trip counts of ++ // every collapsed canonical loop. We don't need to create the loop nests ++ // here, since we're only interested in the trip count. ++ for (auto [loopLower, loopUpper, loopStep] : ++ llvm::zip_equal(lowerBounds, upperBounds, steps)) { ++ llvm::Value *lowerBound = moduleTranslation.lookupValue(loopLower); ++ llvm::Value *upperBound = moduleTranslation.lookupValue(loopUpper); ++ llvm::Value *step = moduleTranslation.lookupValue(loopStep); ++ ++ llvm::OpenMPIRBuilder::LocationDescription loc(builder); ++ llvm::Value *tripCount = ompBuilder->calculateCanonicalLoopTripCount( ++ loc, lowerBound, upperBound, step, /*IsSigned=*/true, ++ loopOp.getLoopInclusive()); ++ ++ if (!bounds.LoopTripCount) { ++ bounds.LoopTripCount = tripCount; ++ continue; ++ } ++ ++ // TODO: Enable UndefinedSanitizer to diagnose an overflow here. ++ bounds.LoopTripCount = builder.CreateMul(bounds.LoopTripCount, tripCount, ++ {}, /*HasNUW=*/true); ++ } ++ } +} + static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { -@@ -2987,32 +3383,20 @@ - if (!targetOpSupported(opInst)) - return failure(); +@@ -3823,12 +4374,14 @@ -+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); -+ bool isTargetDevice = ompBuilder->Config.isTargetDevice(); + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + bool isTargetDevice = ompBuilder->Config.isTargetDevice(); + bool isGPU = ompBuilder->Config.isGPU(); ++ auto parentFn = opInst.getParentOfType(); - auto targetOp = cast(opInst); ++ auto blockIface = cast(opInst); auto &targetRegion = targetOp.getRegion(); DataLayout dl = DataLayout(opInst.getParentOfType()); - SmallVector mapOperands = targetOp.getMapOperands(); - -+ llvm::OpenMPIRBuilder::TargetKernelRuntimeBounds runtimeBounds; - LogicalResult bodyGenStatus = success(); - using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - auto bodyCB = [&](InsertPointTy allocaIP, - InsertPointTy codeGenIP) -> InsertPointTy { -- // Forward target-cpu and target-features function attributes from the -- // original function to the new outlined function. -- llvm::Function *llvmParentFn = -- moduleTranslation.lookupFunction(parentFn.getName()); -- llvm::Function *llvmOutlinedFn = codeGenIP.getBlock()->getParent(); -- assert(llvmParentFn && llvmOutlinedFn && -- "Both parent and outlined functions must exist at this point"); -- -- if (auto attr = llvmParentFn->getFnAttribute("target-cpu"); -- attr.isStringAttribute()) -- llvmOutlinedFn->addFnAttr(attr); -- -- if (auto attr = llvmParentFn->getFnAttribute("target-features"); -- attr.isStringAttribute()) -- llvmOutlinedFn->addFnAttr(attr); -- - builder.restoreIP(codeGenIP); - unsigned argIndex = 0; - for (auto &mapOp : mapOperands) { -@@ -3027,6 +3411,10 @@ - llvm::BasicBlock *exitBlock = convertOmpOpRegions( - targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus); - builder.SetInsertPoint(exitBlock); -+ -+ if (!isTargetDevice) -+ initTargetRuntimeBounds(moduleTranslation, targetOp, runtimeBounds); + SmallVector mapVars = targetOp.getMapVars(); +- ArrayRef mapBlockArgs = +- cast(opInst).getMapBlockArgs(); ++ ArrayRef mapBlockArgs = blockIface.getMapBlockArgs(); + llvm::Function *llvmOutlinedFn = nullptr; + + // TODO: It can also be false if a compile-time constant `false` IF clause is +@@ -3871,7 +4424,7 @@ + OperandRange privateVars = targetOp.getPrivateVars(); + std::optional privateSyms = targetOp.getPrivateSyms(); + MutableArrayRef privateBlockArgs = +- cast(opInst).getPrivateBlockArgs(); ++ blockIface.getPrivateBlockArgs(); + + for (auto [privVar, privatizerNameAttr, privBlockArg] : + llvm::zip_equal(privateVars, *privateSyms, privateBlockArgs)) { +@@ -3905,6 +4458,7 @@ + return exitBlock.takeError(); + + builder.SetInsertPoint(*exitBlock); + return builder.saveIP(); }; -@@ -3038,9 +3426,6 @@ +@@ -3916,9 +4470,6 @@ if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName)) return failure(); @@ -13820,49 +13823,57 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); -@@ -3060,14 +3445,12 @@ - auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input, - llvm::Value *&retVal, InsertPointTy allocaIP, - InsertPointTy codeGenIP) { -- llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); -- - // We just return the unaltered argument for the host function - // for now, some alterations may be required in the future to - // keep host fallback functions working identically to the device - // version (e.g. pass ByCopy values should be treated as such on - // host and device, currently not always the case) -- if (!ompBuilder->Config.isTargetDevice()) { -+ if (!isTargetDevice) { - retVal = cast(&arg); - return codeGenIP; - } -@@ -3089,13 +3472,21 @@ - kernelInput.push_back(mapData.OriginalValue[i]); - } +@@ -3954,6 +4505,29 @@ + }; -- builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget( -- ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams, -- defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB)); + llvm::SmallVector kernelInput; + llvm::OpenMPIRBuilder::TargetKernelDefaultBounds defaultBounds; + initTargetDefaultBounds(targetOp, defaultBounds, isTargetDevice, isGPU); + -+ if (Value targetThreadLimit = targetOp.getThreadLimit()) -+ runtimeBounds.TargetThreadLimit = -+ moduleTranslation.lookupValue(targetThreadLimit); -+ -+ builder.restoreIP(ompBuilder->createTarget( -+ ompLoc, targetOp.isTargetSPMDLoop(), allocaIP, builder.saveIP(), -+ entryInfo, defaultBounds, runtimeBounds, kernelInput, genMapInfoCB, -+ bodyCB, argAccessorCB)); - - // Remap access operations to declare target reference pointers for the - // device, essentially generating extra loadop's as necessary -- if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) -+ if (isTargetDevice) - handleDeclareTargetMapVar(mapData, moduleTranslation, builder); - - return bodyGenStatus; -@@ -3191,24 +3582,45 @@ ++ // Collect host-evaluated values needed to properly launch the kernel from the ++ // host. ++ llvm::OpenMPIRBuilder::TargetKernelRuntimeBounds runtimeBounds; ++ if (!isTargetDevice) ++ initTargetRuntimeBounds(builder, moduleTranslation, targetOp, ++ runtimeBounds); ++ ++ // Pass host-evaluated values as parameters to the kernel / host fallback, ++ // except if they are constants. In any case, map the MLIR block argument to ++ // the corresponding LLVM values. ++ SmallVector hostEvalVars = targetOp.getHostEvalVars(); ++ ArrayRef hostEvalBlockArgs = blockIface.getHostEvalBlockArgs(); ++ for (auto [arg, var] : llvm::zip_equal(hostEvalBlockArgs, hostEvalVars)) { ++ llvm::Value *value = moduleTranslation.lookupValue(var); ++ moduleTranslation.mapValue(arg, value); ++ ++ if (!llvm::isa(value)) ++ kernelInput.push_back(value); ++ } ++ + for (size_t i = 0; i < mapVars.size(); ++i) { + // declare target arguments are not passed to kernels as arguments + // TODO: We currently do not handle cases where a member is explicitly +@@ -3969,11 +4543,16 @@ + buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(), + moduleTranslation, dds); + ++ llvm::Value *ifCond = nullptr; ++ if (Value targetIfCond = targetOp.getIfExpr()) ++ ifCond = moduleTranslation.lookupValue(targetIfCond); ++ + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + moduleTranslation.getOpenMPBuilder()->createTarget( +- ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo, +- defaultValTeams, defaultValThreads, kernelInput, genMapInfoCB, bodyCB, +- argAccessorCB, dds, targetOp.getNowait()); ++ ompLoc, targetOp.isTargetSPMDLoop(), isOffloadEntry, ifCond, allocaIP, ++ builder.saveIP(), entryInfo, defaultBounds, runtimeBounds, ++ kernelInput, genMapInfoCB, bodyCB, argAccessorCB, dds, ++ targetOp.getNowait()); + + if (failed(handleError(afterIP, opInst))) + return failure(); +@@ -4079,25 +4658,6 @@ return success(); } @@ -13872,10 +13883,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O - // Assumes no reverse offloading - if (op->getParentOfType()) - return true; -+/////////////////////////////////////////////////////////////////////////////// -+// CompoundConstructs lowering forward declarations -+class OpenMPDialectLLVMIRTranslationInterface; - +- - if (auto parentFn = op->getParentOfType()) - if (auto declareTargetIface = - llvm::dyn_cast( @@ -13884,47 +13892,14 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O - declareTargetIface.getDeclareTargetDeviceType() != - mlir::omp::DeclareTargetDeviceType::host) - return true; -+using ConvertFunctionTy = std::function( -+ Operation *, llvm::IRBuilderBase &, LLVM::ModuleTranslation &)>; - +- - return false; -} -+class ConversionDispatchList { -+private: -+ llvm::SmallVector functions; -+ -+public: -+ std::pair -+ convertOperation(Operation *op, llvm::IRBuilderBase &builder, -+ LLVM::ModuleTranslation &moduleTranslation) { -+ for (auto riter = functions.rbegin(); riter != functions.rend(); ++riter) { -+ bool match = false; -+ LogicalResult result = failure(); -+ std::tie(match, result) = (*riter)(op, builder, moduleTranslation); -+ if (match) -+ return {true, result}; -+ } -+ return {false, failure()}; -+ } -+ -+ void pushConversionFunction(ConvertFunctionTy function) { -+ functions.push_back(function); -+ } -+ void popConversionFunction() { functions.pop_back(); } -+}; -+ -+static LogicalResult convertOmpDistributeParallelWsloop( -+ Operation *op, omp::DistributeOp distribute, omp::ParallelOp parallel, -+ omp::WsloopOp wsloop, llvm::IRBuilderBase &builder, -+ LLVM::ModuleTranslation &moduleTranslation, -+ ConversionDispatchList &dispatchList); -+ -+/////////////////////////////////////////////////////////////////////////////// -+// Dispatch functions - +- /// Given an OpenMP MLIR operation, create the corresponding LLVM IR /// (including OpenMP runtime calls). -@@ -3313,6 +3725,9 @@ + static LogicalResult +@@ -4214,6 +4774,9 @@ .Case([&](omp::TargetOp) { return convertOmpTarget(*op, builder, moduleTranslation); }) @@ -13934,7 +13909,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O .Case( [&](auto op) { // No-op, should be handled by relevant owning operations e.g. -@@ -3326,9 +3741,101 @@ +@@ -4226,6 +4789,38 @@ }); } @@ -13957,55 +13932,6 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + return false; +} + -+// Returns true if the given block has a single instruction. -+static bool singleInstrBlock(Block &block) { -+ bool result = (block.getOperations().size() == 2); -+ if (!result) { -+ llvm::errs() << "Num ops: " << block.getOperations().size() << "\n"; -+ } -+ return result; -+} -+ -+// Returns the operation if it only contains one instruction otherwise -+// return nullptr. -+template -+Operation *getContainedInstr(OpType op) { -+ Region ®ion = op.getRegion(); -+ if (!region.hasOneBlock()) { -+ llvm::errs() << "Region has multiple blocks\n"; -+ return nullptr; -+ } -+ Block &block = region.front(); -+ if (!singleInstrBlock(block)) { -+ return nullptr; -+ } -+ return &(block.getOperations().front()); -+} -+ -+// Returns the operation if it only contains one instruction otherwise -+// return nullptr. -+template -+Block &getContainedBlock(OpType op) { -+ Region ®ion = op.getRegion(); -+ return region.front(); -+} -+ -+template -+bool matchOpScanNest(Block &block, FirstOpType &firstOp, -+ RestOpTypes &...restOps) { -+ for (Operation &op : block) { -+ if ((firstOp = mlir::dyn_cast(op))) { -+ if constexpr (sizeof...(RestOpTypes) == 0) { -+ return true; -+ } else { -+ Block &innerBlock = getContainedBlock(firstOp); -+ return matchOpScanNest(innerBlock, restOps...); -+ } -+ } -+ } -+ return false; -+} -+ +template +bool matchOpNest(Operation *op, FirstOpType &firstOp, RestOpTypes &...restOps) { + if ((firstOp = mlir::dyn_cast(op))) { @@ -14021,87 +13947,18 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O + static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, -- LLVM::ModuleTranslation &moduleTranslation) { -+ LLVM::ModuleTranslation &moduleTranslation, -+ ConversionDispatchList &dispatchList) { -+ omp::DistributeOp distribute; -+ omp::ParallelOp parallel; -+ omp::WsloopOp wsloop; -+ // Match composite constructs -+ if (matchOpNest(op, distribute, parallel, wsloop)) { -+ return convertOmpDistributeParallelWsloop(op, distribute, parallel, wsloop, -+ builder, moduleTranslation, -+ dispatchList); -+ } -+ - return convertHostOrTargetOperation(op, builder, moduleTranslation); - } - -@@ -3356,12 +3863,62 @@ + LLVM::ModuleTranslation &moduleTranslation) { +@@ -4256,7 +4851,8 @@ return failure(interrupted); } -namespace { +/////////////////////////////////////////////////////////////////////////////// -+// CompoundConstructs lowering implementations -+ -+// Implementation converting a nest of operations in a single function. This -+// just overrides the parallel and wsloop dispatches but does the normal -+// lowering for now. -+static LogicalResult convertOmpDistributeParallelWsloop( -+ Operation *op, omp::DistributeOp distribute, omp::ParallelOp parallel, -+ omp::WsloopOp wsloop, llvm::IRBuilderBase &builder, -+ LLVM::ModuleTranslation &moduleTranslation, -+ ConversionDispatchList &dispatchList) { -+ -+ // Reduction related data structures -+ SmallVector owningReductionGens; -+ SmallVector owningAtomicReductionGens; -+ SmallVector reductionInfos; -+ llvm::OpenMPIRBuilder::InsertPointTy redAllocaIP; -+ -+ // Convert wsloop alternative implementation -+ ConvertFunctionTy convertWsloop = -+ [&redAllocaIP, &owningReductionGens, &owningAtomicReductionGens, -+ &reductionInfos](Operation *op, llvm::IRBuilderBase &builder, -+ LLVM::ModuleTranslation &moduleTranslation) { -+ if (!isa(op)) { -+ return std::make_pair(false, failure()); -+ } -+ -+ LogicalResult result = convertOmpWsloop( -+ *op, builder, moduleTranslation, redAllocaIP, owningReductionGens, -+ owningAtomicReductionGens, reductionInfos); -+ return std::make_pair(true, result); -+ }; -+ -+ // Push the new alternative functions -+ dispatchList.pushConversionFunction(convertWsloop); -+ -+ // Lower the current distribute operation -+ LogicalResult result = convertOmpDistribute(*op, builder, moduleTranslation, -+ &redAllocaIP, reductionInfos); -+ -+ // Pop the alternative functions -+ dispatchList.popConversionFunction(); -+ -+ return result; -+} -+ -+/////////////////////////////////////////////////////////////////////////////// +// OpenMPDialectLLVMIRTranslationInterface /// Implementation of the dialect interface that converts operations belonging /// to the OpenMP dialect to LLVM IR. - class OpenMPDialectLLVMIRTranslationInterface - : public LLVMTranslationDialectInterface { -+private: -+ mutable ConversionDispatchList dispatchList; -+ - public: - using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; - -@@ -3371,16 +3928,14 @@ +@@ -4271,16 +4867,14 @@ convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const final; @@ -14120,369 +13977,193 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation( Operation *op, ArrayRef instructions, NamedAttribute attribute, -@@ -3475,13 +4030,21 @@ - Operation *op, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) const { - -+ // Check to see if there is a lowering that overrides the default lowering -+ // if not use the default dispatch. -+ bool match = false; -+ LogicalResult result = success(); -+ std::tie(match, result) = -+ dispatchList.convertOperation(op, builder, moduleTranslation); -+ if (match) -+ return result; -+ - llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); - if (ompBuilder->Config.isTargetDevice()) { -- if (isTargetDeviceOp(op)) { -- return convertTargetDeviceOp(op, builder, moduleTranslation); -- } else { -- return convertTargetOpsInNest(op, builder, moduleTranslation); -- } -+ if (isTargetDeviceOp(op)) -+ return convertTargetDeviceOp(op, builder, moduleTranslation, -+ dispatchList); -+ return convertTargetOpsInNest(op, builder, moduleTranslation); - } - return convertHostOrTargetOperation(op, builder, moduleTranslation); - } -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir llvm-project/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir ---- llvm-project.orig/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir 2024-06-12 10:43:15.340181898 -0500 -+++ llvm-project/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir 2024-06-12 10:44:09.359614154 -0500 -@@ -174,6 +174,7 @@ - ^bb3: - omp.yield - } -+ omp.terminator - } - return - } -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Dialect/OpenMP/invalid.mlir llvm-project/mlir/test/Dialect/OpenMP/invalid.mlir ---- llvm-project.orig/mlir/test/Dialect/OpenMP/invalid.mlir 2024-06-12 10:43:15.360181687 -0500 -+++ llvm-project/mlir/test/Dialect/OpenMP/invalid.mlir 2024-06-12 10:44:09.359614154 -0500 -@@ -11,8 +11,8 @@ - // ----- - - func.func @not_wrapper() { -+ // expected-error@+1 {{op must be a loop wrapper}} - omp.distribute { -- // expected-error@+1 {{op must take a loop wrapper role if nested inside of 'omp.distribute'}} - omp.parallel { - %0 = arith.constant 0 : i32 - omp.terminator -@@ -363,12 +363,16 @@ - - // ----- - --func.func @omp_simd_nested_wrapper() -> () { -+func.func @omp_simd_nested_wrapper(%lb : index, %ub : index, %step : index) -> () { - // expected-error @below {{op must wrap an 'omp.loop_nest' directly}} - omp.simd { - omp.distribute { -+ omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { -+ omp.yield -+ } - omp.terminator - } -+ omp.terminator - } - return - } -@@ -1359,24 +1363,18 @@ - // ----- - - func.func @omp_teams_num_teams1(%lb : i32) { -- omp.target { -- // expected-error @below {{expected num_teams upper bound to be defined if the lower bound is defined}} -- "omp.teams" (%lb) ({ -- omp.terminator -- }) {operandSegmentSizes = array} : (i32) -> () -+ // expected-error @below {{expected num_teams upper bound to be defined if the lower bound is defined}} -+ "omp.teams" (%lb) ({ - omp.terminator -- } -+ }) {operandSegmentSizes = array} : (i32) -> () - return - } - - // ----- - - func.func @omp_teams_num_teams2(%lb : i32, %ub : i16) { -- omp.target { -- // expected-error @below {{expected num_teams upper bound and lower bound to be the same type}} -- omp.teams num_teams(%lb : i32 to %ub : i16) { -- omp.terminator -- } -+ // expected-error @below {{expected num_teams upper bound and lower bound to be the same type}} -+ omp.teams num_teams(%lb : i32 to %ub : i16) { - omp.terminator - } - return -@@ -1920,6 +1918,7 @@ - } - omp.terminator - } -+ omp.terminator - } - return - } -@@ -2084,7 +2083,7 @@ - // expected-error @below {{op expected as many depend values as depend variables}} - "omp.target"(%data_var) ({ - "omp.terminator"() : () -> () -- }) {depends = [], operandSegmentSizes = array} : (memref) -> () -+ }) {depends = [], operandSegmentSizes = array} : (memref) -> () - "func.return"() : () -> () - } - -@@ -2118,11 +2117,13 @@ - - // ----- - --func.func @omp_distribute_nested_wrapper(%data_var : memref) -> () { -+func.func @omp_distribute_nested_wrapper(%lb: index, %ub: index, %step: index) -> () { - // expected-error @below {{only supported nested wrappers are 'omp.parallel' and 'omp.simd'}} - omp.distribute { - "omp.wsloop"() ({ -- %0 = arith.constant 0 : i32 -+ omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { -+ "omp.yield"() : () -> () -+ } - "omp.terminator"() : () -> () - }) : () -> () - "omp.terminator"() : () -> () -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Dialect/OpenMP/ops.mlir llvm-project/mlir/test/Dialect/OpenMP/ops.mlir ---- llvm-project.orig/mlir/test/Dialect/OpenMP/ops.mlir 2024-06-12 10:43:15.360181687 -0500 -+++ llvm-project/mlir/test/Dialect/OpenMP/ops.mlir 2024-06-12 10:44:09.359614154 -0500 -@@ -601,6 +601,7 @@ - omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { - omp.yield - } -+ omp.terminator - } - return - } -@@ -616,6 +617,7 @@ - omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { - omp.yield - } -+ omp.terminator - } - return - } -@@ -627,6 +629,7 @@ - omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) { - omp.yield - } -+ omp.terminator - } - return - } -@@ -640,6 +643,7 @@ - omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { - omp.yield - } -+ omp.terminator - } - return - } -@@ -651,6 +655,7 @@ - omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) { - omp.yield - } -+ omp.terminator - } - return - } -@@ -662,6 +667,7 @@ - omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) { - omp.yield - } -+ omp.terminator - } - return - } -@@ -673,6 +679,7 @@ - omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) { - omp.yield - } -+ omp.terminator - } - return - } -@@ -692,30 +699,35 @@ - omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { - omp.yield - } -+ omp.terminator - } - // CHECK: omp.distribute dist_schedule_static - omp.distribute dist_schedule_static { - omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { - omp.yield - } -+ omp.terminator - } - // CHECK: omp.distribute dist_schedule_static chunk_size(%{{.+}} : i32) - omp.distribute dist_schedule_static chunk_size(%chunk_size : i32) { - omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { - omp.yield - } -+ omp.terminator - } - // CHECK: omp.distribute order(concurrent) - omp.distribute order(concurrent) { - omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { - omp.yield - } -+ omp.terminator - } - // CHECK: omp.distribute allocate(%{{.+}} : memref -> %{{.+}} : memref) - omp.distribute allocate(%data_var : memref -> %data_var : memref) { - omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { - omp.yield - } -+ omp.terminator - } - // CHECK: omp.distribute - omp.distribute { -@@ -723,7 +735,9 @@ - omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { - omp.yield - } -+ omp.terminator - } -+ omp.terminator - } - return - } -@@ -737,7 +751,7 @@ - "omp.target"(%if_cond, %device, %num_threads) ({ - // CHECK: omp.terminator - omp.terminator -- }) {nowait, operandSegmentSizes = array} : ( i1, si32, i32 ) -> () -+ }) {nowait, operandSegmentSizes = array} : ( i1, si32, i32 ) -> () +@@ -4394,11 +4988,10 @@ - // Test with optional map clause. - // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_1:.*]] : memref, tensor) map_clauses(tofrom) capture(ByRef) -> memref {name = ""} -@@ -2201,6 +2215,7 @@ - // CHECK: omp.yield - omp.yield - } -+ omp.terminator + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + if (ompBuilder->Config.isTargetDevice()) { +- if (isTargetDeviceOp(op)) { ++ if (isTargetDeviceOp(op)) + return convertTargetDeviceOp(op, builder, moduleTranslation); +- } else { ++ else + return convertTargetOpsInNest(op, builder, moduleTranslation); +- } } + return convertHostOrTargetOperation(op, builder, moduleTranslation); + } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Dialect/OpenMP/invalid.mlir llvm-project-aso/mlir/test/Dialect/OpenMP/invalid.mlir +--- llvm-project-aso-orig/mlir/test/Dialect/OpenMP/invalid.mlir 2024-11-23 20:25:27.503272791 -0600 ++++ llvm-project-aso/mlir/test/Dialect/OpenMP/invalid.mlir 2024-11-23 20:39:47.200175294 -0600 +@@ -1391,24 +1391,18 @@ + // ----- - %testbool = "test.bool"() : () -> (i1) -@@ -2211,6 +2226,7 @@ - // CHECK: omp.yield - omp.yield - } -+ omp.terminator - } + func.func @omp_teams_num_teams1(%lb : i32) { +- omp.target { +- // expected-error @below {{expected num_teams upper bound to be defined if the lower bound is defined}} +- "omp.teams" (%lb) ({ +- omp.terminator +- }) {operandSegmentSizes = array} : (i32) -> () ++ // expected-error @below {{expected num_teams upper bound to be defined if the lower bound is defined}} ++ "omp.teams" (%lb) ({ + omp.terminator +- } ++ }) {operandSegmentSizes = array} : (i32) -> () + return + } - // CHECK: omp.taskloop final(%{{[^)]+}}) { -@@ -2219,6 +2235,7 @@ - // CHECK: omp.yield - omp.yield - } -+ omp.terminator - } + // ----- - // CHECK: omp.taskloop untied { -@@ -2227,6 +2244,7 @@ - // CHECK: omp.yield - omp.yield - } -+ omp.terminator + func.func @omp_teams_num_teams2(%lb : i32, %ub : i16) { +- omp.target { +- // expected-error @below {{expected num_teams upper bound and lower bound to be the same type}} +- omp.teams num_teams(%lb : i32 to %ub : i16) { +- omp.terminator +- } ++ // expected-error @below {{expected num_teams upper bound and lower bound to be the same type}} ++ omp.teams num_teams(%lb : i32 to %ub : i16) { + omp.terminator } + return +@@ -2138,11 +2132,80 @@ - // CHECK: omp.taskloop mergeable { -@@ -2235,6 +2253,7 @@ - // CHECK: omp.yield - omp.yield - } -+ omp.terminator - } + // ----- - %testf32 = "test.f32"() : () -> (!llvm.ptr) -@@ -2245,6 +2264,7 @@ - // CHECK: omp.yield - omp.yield - } ++func.func @omp_target_multiple_teams() { ++ // expected-error @below {{target containing multiple teams constructs}} ++ omp.target { ++ omp.teams { ++ omp.terminator ++ } ++ omp.teams { ++ omp.terminator ++ } + omp.terminator - } - - // CHECK: omp.taskloop reduction(@add_f32 -> %{{.+}} : !llvm.ptr, @add_f32 -> %{{.+}} : !llvm.ptr) { -@@ -2253,6 +2273,7 @@ - // CHECK: omp.yield - omp.yield - } ++ } ++ return ++} ++ ++// ----- ++ ++func.func @omp_target_host_eval1(%x : !llvm.ptr) { ++ // expected-error @below {{op host_eval argument illegal use in 'llvm.load' operation}} ++ omp.target host_eval(%x -> %arg0 : !llvm.ptr) { ++ %0 = llvm.load %arg0 : !llvm.ptr -> f32 + omp.terminator - } - - // CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr) reduction(@add_f32 -> %{{.+}} : !llvm.ptr) { -@@ -2261,6 +2282,7 @@ - // CHECK: omp.yield - omp.yield - } ++ } ++ return ++} ++ ++// ----- ++ ++func.func @omp_target_host_eval2(%x : i1) { ++ // expected-error @below {{op host_eval argument only legal as 'num_teams' and 'thread_limit' in 'omp.teams'}} ++ omp.target host_eval(%x -> %arg0 : i1) { ++ omp.teams if(%arg0) { ++ omp.terminator ++ } + omp.terminator - } - - %testi32 = "test.i32"() : () -> (i32) -@@ -2270,6 +2292,7 @@ - // CHECK: omp.yield - omp.yield - } ++ } ++ return ++} ++ ++// ----- ++ ++func.func @omp_target_host_eval3(%x : i32) { ++ // expected-error @below {{op host_eval argument only legal as 'num_threads' in 'omp.parallel' when representing target SPMD}} ++ omp.target host_eval(%x -> %arg0 : i32) { ++ omp.parallel num_threads(%arg0 : i32) { ++ omp.terminator ++ } + omp.terminator - } - - %testmemref = "test.memref"() : () -> (memref) -@@ -2279,6 +2302,7 @@ - // CHECK: omp.yield - omp.yield - } ++ } ++ return ++} ++ ++// ----- ++ ++func.func @omp_target_host_eval3(%x : i32) { ++ // expected-error @below {{op host_eval argument only legal as loop bounds and steps in 'omp.loop_nest' when representing target SPMD}} ++ omp.target host_eval(%x -> %arg0 : i32) { ++ omp.wsloop { ++ omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { ++ omp.yield ++ } ++ omp.terminator ++ } + omp.terminator - } ++ } ++ return ++} ++ ++// ----- ++ + func.func @omp_target_depend(%data_var: memref) { + // expected-error @below {{op expected as many depend values as depend variables}} + "omp.target"(%data_var) ({ + "omp.terminator"() : () -> () +- }) {depend_kinds = [], operandSegmentSizes = array} : (memref) -> () ++ }) {depend_kinds = [], operandSegmentSizes = array} : (memref) -> () + "func.return"() : () -> () + } - %testi64 = "test.i64"() : () -> (i64) -@@ -2288,6 +2312,7 @@ - // CHECK: omp.yield - omp.yield - } -+ omp.terminator - } +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Dialect/OpenMP/ops.mlir llvm-project-aso/mlir/test/Dialect/OpenMP/ops.mlir +--- llvm-project-aso-orig/mlir/test/Dialect/OpenMP/ops.mlir 2024-11-23 20:25:27.503272791 -0600 ++++ llvm-project-aso/mlir/test/Dialect/OpenMP/ops.mlir 2024-11-23 20:39:47.200175294 -0600 +@@ -770,7 +770,7 @@ + "omp.target"(%device, %if_cond, %num_threads) ({ + // CHECK: omp.terminator + omp.terminator +- }) {nowait, operandSegmentSizes = array} : ( si32, i1, i32 ) -> () ++ }) {nowait, operandSegmentSizes = array} : ( si32, i1, i32 ) -> () - // CHECK: omp.taskloop num_tasks(%{{[^:]+}}: i64) { -@@ -2296,6 +2321,7 @@ - // CHECK: omp.yield - omp.yield - } -+ omp.terminator - } + // Test with optional map clause. + // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_1:.*]] : memref, tensor) map_clauses(tofrom) capture(ByRef) -> memref {name = ""} +@@ -2750,6 +2750,42 @@ + return + } - // CHECK: omp.taskloop nogroup { -@@ -2304,6 +2330,7 @@ - // CHECK: omp.yield - omp.yield - } ++func.func @omp_target_host_eval(%x : i32) { ++ // CHECK: omp.target host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) { ++ // CHECK: omp.teams num_teams( to %[[HOST_ARG]] : i32) ++ // CHECK-SAME: thread_limit(%[[HOST_ARG]] : i32) ++ omp.target host_eval(%x -> %arg0 : i32) { ++ omp.teams num_teams(to %arg0 : i32) thread_limit(%arg0 : i32) { ++ omp.terminator ++ } + omp.terminator - } - - // CHECK: omp.taskloop { -@@ -2313,7 +2340,9 @@ - // CHECK: omp.yield - omp.yield - } ++ } ++ ++ // CHECK: omp.target host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) { ++ // CHECK: omp.teams ++ // CHECK: omp.parallel num_threads(%[[HOST_ARG]] : i32) { ++ // CHECK: omp.distribute { ++ // CHECK: omp.wsloop { ++ // CHECK: omp.loop_nest (%{{.*}}) : i32 = (%[[HOST_ARG]]) to (%[[HOST_ARG]]) step (%[[HOST_ARG]]) { ++ omp.target host_eval(%x -> %arg0 : i32) { ++ omp.teams { ++ omp.parallel num_threads(%arg0 : i32) { ++ omp.distribute { ++ omp.wsloop { ++ omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { ++ omp.yield ++ } ++ } {omp.composite} ++ } {omp.composite} ++ omp.terminator ++ } {omp.composite} + omp.terminator - } ++ } + omp.terminator - } - - // CHECK: return -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir ---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir 2024-06-12 10:43:15.400181266 -0500 -+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir 2024-06-12 10:44:09.359614154 -0500 ++ } ++ return ++} ++ + // CHECK-LABEL: omp_loop + func.func @omp_loop(%lb : index, %ub : index, %step : index) { + // CHECK: omp.loop { +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir 2024-10-18 17:40:33.932977650 -0500 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir 2024-11-23 20:39:47.200175294 -0600 @@ -1,6 +1,6 @@ // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s @@ -14491,7 +14172,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge llvm.func @_QQmain() attributes {fir.bindc_name = "main"} { %0 = llvm.mlir.addressof @_QFEi : !llvm.ptr %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr -@@ -24,7 +24,7 @@ +@@ -23,7 +23,7 @@ } } @@ -14500,9 +14181,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge // CHECK: entry: // CHECK: %[[ALLOCA_BYREF:.*]] = alloca ptr, align 8 -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir ---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir 2024-06-12 10:43:15.400181266 -0500 -+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir 2024-06-12 10:44:09.359614154 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir 2024-10-18 17:40:33.932977650 -0500 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir 2024-11-23 20:39:47.200175294 -0600 @@ -10,7 +10,7 @@ // constant sized) allocations performs its task reasonably in these // scenarios. @@ -14512,7 +14193,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge llvm.func @_QQmain() attributes {omp.declare_target = #omp.declaretarget} { %1 = llvm.mlir.constant(1 : i64) : i64 %2 = llvm.alloca %1 x !llvm.struct<(ptr)> : (i64) -> !llvm.ptr -@@ -34,7 +34,7 @@ +@@ -33,7 +33,7 @@ llvm.func @_ExternalCall(!llvm.ptr, !llvm.ptr) -> !llvm.struct<()> } @@ -14521,9 +14202,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge // CHECK-NEXT: entry: // CHECK-NEXT: %[[MOVED_ALLOCA1:.*]] = alloca { ptr }, align 8 // CHECK-NEXT: %[[MOVED_ALLOCA2:.*]] = alloca i32, i64 1, align 4 -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir ---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir 2024-06-12 10:43:15.400181266 -0500 -+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir 2024-06-12 10:44:09.359614154 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir 2024-10-18 17:40:33.936977609 -0500 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir 2024-11-23 20:39:47.200175294 -0600 @@ -1,6 +1,6 @@ // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s @@ -14532,7 +14213,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge llvm.func @_QQmain() attributes {bindc_name = "main"} { %0 = llvm.mlir.addressof @_QFEsp : !llvm.ptr %1 = llvm.mlir.constant(10 : index) : i64 -@@ -31,7 +31,7 @@ +@@ -30,7 +30,7 @@ } @@ -14541,9 +14222,20 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge // CHECK: %[[ARG1_ALLOCA:.*]] = alloca ptr, align 8 // CHECK: store ptr %[[ARG1]], ptr %[[ARG1_ALLOCA]], align 8 -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir ---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir 2024-06-12 10:43:15.400181266 -0500 -+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir 2024-06-12 10:44:09.359614154 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-debug.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-debug.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-debug.mlir 2024-10-18 17:40:33.936977609 -0500 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-debug.mlir 2024-11-23 20:39:47.200175294 -0600 +@@ -1,6 +1,6 @@ + // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +-module attributes {omp.is_target_device = true} { ++module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd-amdhsa"} { + llvm.func @_QQmain() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir 2024-10-18 17:40:33.936977609 -0500 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir 2024-11-23 20:39:47.200175294 -0600 @@ -7,7 +7,7 @@ // Unfortunately, only so much can be tested as the device side is dependent on a *.bc // file created by the host and appended as an attribute to the module. @@ -14553,10 +14245,393 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge // CHECK-DAG: @_QMtest_0Esp_decl_tgt_ref_ptr = weak global ptr null, align 8 llvm.mlir.global external @_QMtest_0Esp() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { %0 = llvm.mlir.constant(0 : i32) : i32 -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir ---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir 2024-06-12 10:43:15.400181266 -0500 -+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir 2024-06-12 10:44:09.359614154 -0500 -@@ -55,7 +55,7 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir 2024-11-23 20:39:47.204175279 -0600 +@@ -0,0 +1,329 @@ ++// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s ++ ++// This test checks the offload sizes, map types and base pointers and pointers ++// provided to the OpenMP kernel argument structure are correct when lowering ++// to LLVM-IR from MLIR when performing explicit member mapping of a record type ++// that includes fortran allocatables in various locations of the record types ++// hierarchy. ++ ++module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} { ++ llvm.func @omp_map_derived_type_allocatable_member(%arg0: !llvm.ptr) { ++ %0 = llvm.mlir.constant(4 : index) : i64 ++ %1 = llvm.mlir.constant(1 : index) : i64 ++ %2 = llvm.mlir.constant(0 : index) : i64 ++ %3 = omp.map.bounds lower_bound(%2 : i64) upper_bound(%0 : i64) extent(%0 : i64) stride(%1 : i64) start_idx(%2 : i64) {stride_in_bytes = true} ++ %4 = llvm.getelementptr %arg0[0, 4] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)> ++ %5 = llvm.getelementptr %4[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> ++ %6 = omp.map.info var_ptr(%4 : !llvm.ptr, i32) var_ptr_ptr(%5 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%3) -> !llvm.ptr {name = ""} ++ %7 = omp.map.info var_ptr(%4 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%array_j"} ++ %8 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%7, %6 : [4,-1], [4,0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l", partial_map = true} ++ omp.target map_entries(%7 -> %arg1, %6 -> %arg2, %8 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { ++ omp.terminator ++ } ++ llvm.return ++ } ++ ++ llvm.func @omp_allocatable_derived_type_member_map(%arg0: !llvm.ptr) { ++ %0 = llvm.mlir.constant(1 : i32) : i32 ++ %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr ++ %2 = llvm.mlir.constant(1 : i32) : i32 ++ %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr ++ %4 = llvm.mlir.constant(5 : index) : i64 ++ %5 = llvm.mlir.constant(4 : index) : i64 ++ %6 = llvm.mlir.constant(1 : index) : i64 ++ %7 = llvm.mlir.constant(0 : index) : i64 ++ %8 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%5 : i64) extent(%5 : i64) stride(%6 : i64) start_idx(%7 : i64) {stride_in_bytes = true} ++ %9 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> ++ llvm.store %9, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>, !llvm.ptr ++ %10 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> ++ %11 = llvm.load %10 : !llvm.ptr -> !llvm.ptr ++ %12 = llvm.getelementptr %11[0, 4] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)> ++ %13 = llvm.getelementptr %12[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> ++ %14 = omp.map.info var_ptr(%12 : !llvm.ptr, i32) var_ptr_ptr(%13 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%8) -> !llvm.ptr {name = ""} ++ %15 = omp.map.info var_ptr(%12 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%array_j"} ++ %16 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> ++ llvm.store %16, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>, !llvm.ptr ++ %17 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> ++ %18 = llvm.load %17 : !llvm.ptr -> !llvm.ptr ++ %19 = llvm.getelementptr %18[0, 5] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)> ++ %20 = omp.map.info var_ptr(%19 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%k"} ++ %21 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> ++ %22 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>) var_ptr_ptr(%21 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} ++ %23 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>) map_clauses(tofrom) capture(ByRef) members(%22, %15, %14, %20 : [0,-1,-1], [0,4,-1], [0,4,0], [0,5,-1] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l"} ++ omp.target map_entries(%22 -> %arg1, %15 -> %arg2, %14 -> %arg3, %20 -> %arg4, %23 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { ++ omp.terminator ++ } ++ llvm.return ++ } ++ ++ llvm.func @omp_alloca_nested_derived_type_map(%arg0: !llvm.ptr) { ++ %0 = llvm.mlir.constant(1 : i32) : i32 ++ %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr ++ %2 = llvm.mlir.constant(1 : i32) : i32 ++ %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr ++ %4 = llvm.mlir.constant(3 : index) : i64 ++ %5 = llvm.mlir.constant(4 : index) : i64 ++ %6 = llvm.mlir.constant(6 : index) : i64 ++ %7 = llvm.mlir.constant(1 : index) : i64 ++ %8 = llvm.mlir.constant(2 : index) : i64 ++ %9 = llvm.mlir.constant(0 : index) : i64 ++ %10 = omp.map.bounds lower_bound(%9 : i64) upper_bound(%5 : i64) extent(%5 : i64) stride(%7 : i64) start_idx(%9 : i64) {stride_in_bytes = true} ++ %11 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> ++ llvm.store %11, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>, !llvm.ptr ++ %12 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> ++ %13 = llvm.load %12 : !llvm.ptr -> !llvm.ptr ++ %14 = llvm.getelementptr %13[0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)> ++ %15 = llvm.getelementptr %14[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)> ++ %16 = llvm.getelementptr %15[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> ++ %17 = omp.map.info var_ptr(%15 : !llvm.ptr, i32) var_ptr_ptr(%16 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%10) -> !llvm.ptr {name = ""} ++ %18 = omp.map.info var_ptr(%15 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%array_k"} ++ %19 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> ++ llvm.store %19, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>, !llvm.ptr ++ %20 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> ++ %21 = llvm.load %20 : !llvm.ptr -> !llvm.ptr ++ %22 = llvm.getelementptr %21[0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)> ++ %23 = llvm.getelementptr %22[0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)> ++ %24 = omp.map.info var_ptr(%23 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%k"} ++ %25 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> ++ %26 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>) var_ptr_ptr(%25 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} ++ %27 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>) map_clauses(tofrom) capture(ByRef) members(%26, %18, %17, %24 : [0,-1,-1,-1], [0,6,2,-1], [0,6,2,0], [0,6,3,-1] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l"} ++ omp.target map_entries(%26 -> %arg1, %18 -> %arg2, %17 -> %arg3, %24 -> %arg4, %27 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { ++ omp.terminator ++ } ++ llvm.return ++ } ++ ++ llvm.func @omp_nested_derived_type_alloca_map(%arg0: !llvm.ptr) { ++ %0 = llvm.mlir.constant(4 : index) : i64 ++ %1 = llvm.mlir.constant(1 : index) : i64 ++ %2 = llvm.mlir.constant(2 : index) : i64 ++ %3 = llvm.mlir.constant(0 : index) : i64 ++ %4 = llvm.mlir.constant(6 : index) : i64 ++ %5 = omp.map.bounds lower_bound(%3 : i64) upper_bound(%0 : i64) extent(%0 : i64) stride(%1 : i64) start_idx(%3 : i64) {stride_in_bytes = true} ++ %6 = llvm.getelementptr %arg0[0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)> ++ %7 = llvm.getelementptr %6[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)> ++ %8 = llvm.getelementptr %7[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> ++ %9 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) var_ptr_ptr(%8 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%5) -> !llvm.ptr {name = ""} ++ %10 = omp.map.info var_ptr(%7 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%array_k"} ++ %11 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>) map_clauses(tofrom) capture(ByRef) members(%10, %9 : [6,2,-1], [6,2,0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l", partial_map = true} ++ omp.target map_entries(%10 -> %arg1, %9 -> %arg2, %11 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { ++ omp.terminator ++ } ++ llvm.return ++ } ++} ++ ++// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 20] ++// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675] ++// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [8 x i64] [i64 0, i64 40, i64 8, i64 136, i64 48, i64 8, i64 20, i64 4] ++// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [8 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 281474976710659] ++// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [8 x i64] [i64 0, i64 40, i64 8, i64 240, i64 48, i64 8, i64 20, i64 4] ++// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [8 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 281474976710659] ++// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 20] ++// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675] ++ ++// CHECK: define void @omp_map_derived_type_allocatable_member(ptr %[[ARG:.*]]) { ++ ++// CHECK: %[[DTYPE_ALLOCATABLE_MEMBER_GEP:.*]] = getelementptr %_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer, ptr %[[ARG]], i32 0, i32 4 ++// CHECK: %[[ALLOCATABLE_MEMBER_BADDR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ALLOCATABLE_MEMBER_GEP]], i32 0, i32 0 ++ ++// CHECK: %[[LOAD_ALLOCATABLE_MEMBER_BADDR:.*]] = load ptr, ptr %[[ALLOCATABLE_MEMBER_BADDR]], align 8 ++// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ALLOCATABLE_MEMBER_BADDR]], i64 0 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_1:.*]] = getelementptr i32, ptr %[[ARR_OFFSET]], i64 1 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_SIZE_SEGMENT_CALC_1]] to i64 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_3:.*]] = ptrtoint ptr %[[DTYPE_ALLOCATABLE_MEMBER_GEP]] to i64 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]] ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0 ++// CHECK: store ptr %[[DTYPE_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0 ++// CHECK: store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1 ++// CHECK: store ptr %[[DTYPE_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2 ++// CHECK: store ptr %[[ALLOCATABLE_MEMBER_BADDR]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3 ++// CHECK: store ptr %[[ALLOCATABLE_MEMBER_BADDR]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3 ++// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: define void @omp_allocatable_derived_type_member_map(ptr %[[ARG:.*]]) { ++ ++// CHECK: %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8 ++// CHECK: %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8 ++// CHECK: %[[LOAD_DTYPE_ALLOCATABLE_ARG:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], align 8 ++// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD_DTYPE_ALLOCATABLE_ARG]], ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA]], align 8 ++// CHECK: %[[DTYPE_ALLOCATABLE_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA]], i32 0, i32 0 ++// CHECK: %[[DTYPE_ALLOCATABLE_BADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_ALLOCATABLE_BADDR_GEP]], align 8 ++// CHECK: %[[DTYPE_ALLOCATABLE_MEMBER_ACCESS:.*]] = getelementptr %_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer, ptr %[[DTYPE_ALLOCATABLE_BADDR_LOAD]], i32 0, i32 4 ++// CHECK: %[[DTYPE_ALLOCATABLE_MEMBER_BADDR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ALLOCATABLE_MEMBER_ACCESS]], i32 0, i32 0 ++// CHECK: %[[LOAD_DTYPE_ALLOCATABLE_ARG:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], align 8 ++// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD_DTYPE_ALLOCATABLE_ARG]], ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA_2]], align 8 ++// CHECK: %[[DTYPE_ALLOCATABLE_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA_2]], i32 0, i32 0 ++// CHECK: %[[DTYPE_ALLOCATABLE_BADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_ALLOCATABLE_BADDR_GEP]], align 8 ++// CHECK: %[[DTYPE_REGULAR_MEMBER_ACCESS:.*]] = getelementptr %_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer, ptr %[[DTYPE_ALLOCATABLE_BADDR_LOAD]], i32 0, i32 5 ++// CHECK: %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], i32 0, i32 0 ++// CHECK: %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_2_LOAD:.*]] = load ptr, ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_2]], align 8 ++// CHECK: %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR]], align 8 ++// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_LOAD]], i64 0 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], i32 1 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_SIZE_SEGMENT_CALC_1]] to i64 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_3:.*]] = ptrtoint ptr %[[ARG]] to i64 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]] ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 0 ++// CHECK: store ptr %[[ARG]], ptr %[[OFFLOAD_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [8 x i64], ptr %.offload_sizes, i32 0, i32 0 ++// CHECK: store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 1 ++// CHECK: store ptr %[[ARG]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 2 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 2 ++// CHECK: store ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_2]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 3 ++// CHECK: store ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_2]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 3 ++// CHECK: store ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_2_LOAD]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 4 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 4 ++// CHECK: store ptr %[[DTYPE_ALLOCATABLE_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 5 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 5 ++// CHECK: store ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 6 ++// CHECK: store ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 6 ++// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 7 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 7 ++// CHECK: store ptr %[[DTYPE_REGULAR_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: define void @omp_alloca_nested_derived_type_map(ptr %[[ARG:.*]]) { ++ ++// CHECK: %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8 ++// CHECK: %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8 ++// CHECK: %[[LOAD_DTYPE_ALLOCATABLE_ARG:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], align 8 ++// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD_DTYPE_ALLOCATABLE_ARG]], ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA]], align 8 ++// CHECK: %[[DTYPE_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA]], i32 0, i32 0 ++// CHECK: %[[DTYPE_BADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_BADDR_GEP]], align 8 ++// CHECK: %[[DTYPE_NESTED_DTYPE_MEMBER_GEP:.*]] = getelementptr %_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer, ptr %[[DTYPE_BADDR_LOAD]], i32 0, i32 6 ++// CHECK: %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_GEP:.*]] = getelementptr %_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer, ptr %[[DTYPE_NESTED_DTYPE_MEMBER_GEP]], i32 0, i32 2 ++// CHECK: %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_GEP]], i32 0, i32 0 ++// CHECK: %[[LOAD_DTYPE_ALLOCATABLE_ARG:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], align 8 ++// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD_DTYPE_ALLOCATABLE_ARG]], ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA_2]], align 8 ++// CHECK: %[[DTYPE_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA_2]], i32 0, i32 0 ++// CHECK: %[[DTYPE_BADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_BADDR_GEP]], align 8 ++// CHECK: %[[DTYPE_NESTED_DTYPE_MEMBER_GEP:.*]] = getelementptr %_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer, ptr %[[DTYPE_BADDR_LOAD]], i32 0, i32 6 ++// CHECK: %[[DTYPE_NESTED_REGULAR_MEMBER_GEP:.*]] = getelementptr %_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer, ptr %[[DTYPE_NESTED_DTYPE_MEMBER_GEP]], i32 0, i32 3 ++// CHECK: %[[DTYPE_ALLOCATABLE_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], i32 0, i32 0 ++// CHECK: %[[DTYPE_ALLOCATABLE_BADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_ALLOCATABLE_BADDR_GEP]], align 8 ++// CHECK: %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], align 8 ++// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD]], i64 0 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], i32 1 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_SIZE_SEGMENT_CALC_1]] to i64 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_3:.*]] = ptrtoint ptr %[[ARG]] to i64 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]] ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 0 ++// CHECK: store ptr %[[ARG]], ptr %[[OFFLOAD_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [8 x i64], ptr %.offload_sizes, i32 0, i32 0 ++// CHECK: store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 1 ++// CHECK: store ptr %[[ARG]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 2 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 2 ++// CHECK: store ptr %[[DTYPE_ALLOCATABLE_BADDR_GEP]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 3 ++// CHECK: store ptr %[[DTYPE_ALLOCATABLE_BADDR_GEP]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 3 ++// CHECK: store ptr %[[DTYPE_ALLOCATABLE_BADDR_LOAD]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 4 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 4 ++// CHECK: store ptr %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 5 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 5 ++// CHECK: store ptr %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 6 ++// CHECK: store ptr %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 6 ++// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 7 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 7 ++// CHECK: store ptr %[[DTYPE_NESTED_REGULAR_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: define void @omp_nested_derived_type_alloca_map(ptr %[[ARG:.*]]) { ++ ++// CHECK: %[[NESTED_DTYPE_MEMBER_GEP:.*]] = getelementptr %_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer, ptr %[[ARG]], i32 0, i32 6 ++// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_GEP:.*]] = getelementptr %_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer, ptr %[[NESTED_DTYPE_MEMBER_GEP]], i32 0, i32 2 ++// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], i32 0, i32 0 ++// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD:.*]] = load ptr, ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], align 8 ++// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD]], i64 0 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_1:.*]] = getelementptr i32, ptr %[[ARR_OFFSET]], i64 1 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_SIZE_SEGMENT_CALC_1]] to i64 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_3:.*]] = ptrtoint ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]] to i64 ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]] ++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0 ++// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0 ++// CHECK: store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1 ++// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2 ++// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2 ++// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[OFFLOAD_PTRS]], align 8 ++ ++// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3 ++// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[BASE_PTRS]], align 8 ++// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3 ++// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir 2024-11-23 20:39:47.204175279 -0600 +@@ -0,0 +1,46 @@ ++// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s ++ ++module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} { ++ llvm.func @omp_target_region_() { ++ %out_teams = llvm.mlir.constant(1000 : i32) : i32 ++ %out_threads = llvm.mlir.constant(2000 : i32) : i32 ++ %out_lb = llvm.mlir.constant(0 : i32) : i32 ++ %out_ub = llvm.mlir.constant(3000 : i32) : i32 ++ %out_step = llvm.mlir.constant(1 : i32) : i32 ++ ++ omp.target ++ host_eval(%out_teams -> %teams, %out_threads -> %threads, ++ %out_lb -> %lb, %out_ub -> %ub, %out_step -> %step : ++ i32, i32, i32, i32, i32) { ++ omp.teams num_teams(to %teams : i32) thread_limit(%threads : i32) { ++ omp.parallel { ++ omp.distribute { ++ omp.wsloop { ++ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { ++ omp.yield ++ } ++ } {omp.composite} ++ } {omp.composite} ++ omp.terminator ++ } {omp.composite} ++ omp.terminator ++ } ++ omp.terminator ++ } ++ llvm.return ++ } ++} ++ ++// CHECK-LABEL: define void @omp_target_region_ ++// CHECK: %[[ARGS:.*]] = alloca %struct.__tgt_kernel_arguments ++ ++// CHECK: %[[TRIPCOUNT_ADDR:.*]] = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %[[ARGS]], i32 0, i32 8 ++// CHECK: store i64 3000, ptr %[[TRIPCOUNT_ADDR]] ++ ++// CHECK: %[[TEAMS_ADDR:.*]] = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %[[ARGS]], i32 0, i32 10 ++// CHECK: store [3 x i32] [i32 1000, i32 0, i32 0], ptr %[[TEAMS_ADDR]] ++ ++// CHECK: %[[THREADS_ADDR:.*]] = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %[[ARGS]], i32 0, i32 11 ++// CHECK: store [3 x i32] [i32 2000, i32 0, i32 0], ptr %[[THREADS_ADDR]] ++ ++// CHECK: call i32 @__tgt_target_kernel(ptr @{{.*}}, i64 {{.*}}, i32 1000, i32 2000, ptr @{{.*}}, ptr %[[ARGS]]) +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir 2024-10-18 17:40:33.936977609 -0500 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir 2024-11-23 20:39:47.204175279 -0600 +@@ -52,7 +52,7 @@ } // CHECK: define weak_odr protected amdgpu_kernel void @[[FUNC0:.*]]( @@ -14565,7 +14640,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge // CHECK: %[[TMP1:.*]] = alloca [1 x ptr], align 8, addrspace(5) // CHECK: %[[TMP2:.*]] = addrspacecast ptr addrspace(5) %[[TMP1]] to ptr // CHECK: %[[STRUCTARG:.*]] = alloca { ptr }, align 8, addrspace(5) -@@ -68,8 +68,8 @@ +@@ -65,8 +65,8 @@ // CHECK: br i1 %[[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]] // CHECK: %[[TMP6:.*]] = load ptr, ptr %[[TMP4]], align 8 // CHECK: %[[OMP_GLOBAL_THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) @@ -14576,7 +14651,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge // CHECK: %[[TMP7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[TMP2]], i64 0, i64 0 // CHECK: store ptr %[[STRUCTARG_ASCAST]], ptr %[[TMP7]], align 8 // CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr @[[FUNC1:.*]], ptr null, ptr %[[TMP2]], i64 1) -@@ -99,7 +99,7 @@ +@@ -96,7 +96,7 @@ // is passed as a param to kmpc_parallel_51 function // CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}( @@ -14585,9 +14660,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge // CHECK: store ptr %[[IFCOND_ARG2]], ptr %[[IFCOND_TMP1:.*]], align 8 // CHECK: %[[IFCOND_TMP2:.*]] = load i32, ptr %[[IFCOND_TMP1]], align 4 // CHECK: %[[IFCOND_TMP3:.*]] = icmp ne i32 %[[IFCOND_TMP2]], 0 -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir ---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir 2024-06-12 10:43:15.400181266 -0500 -+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir 2024-06-12 10:44:09.359614154 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir 2024-10-18 17:40:33.936977609 -0500 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir 2024-11-23 20:39:47.204175279 -0600 @@ -4,10 +4,11 @@ // for nested omp do loop inside omp target region @@ -14603,7 +14678,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge omp.parallel { %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 -@@ -37,14 +38,14 @@ +@@ -36,14 +37,14 @@ // CHECK-SAME: ptr %[[ARG_PTR:.*]]) // CHECK-SAME: #[[ATTRS1:[0-9]+]] // CHECK: call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB]] to ptr), @@ -14621,9 +14696,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge +// CHECK: attributes #[[ATTRS2]] = { // CHECK-SAME: "target-cpu"="gfx90a" // CHECK-SAME: "target-features"="+gfx9-insts,+wavefrontsize64" -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir ---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir 2024-06-12 10:43:15.400181266 -0500 -+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir 2024-06-12 10:44:09.359614154 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir 2024-10-18 17:40:33.936977609 -0500 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir 2024-11-23 20:39:47.204175279 -0600 @@ -1,6 +1,6 @@ // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s @@ -14632,7 +14707,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge llvm.func @omp_target_region_() { %0 = llvm.mlir.constant(20 : i32) : i32 %1 = llvm.mlir.constant(10 : i32) : i32 -@@ -30,8 +30,8 @@ +@@ -29,8 +29,8 @@ // CHECK: @[[SRC_LOC:.*]] = private unnamed_addr constant [23 x i8] c"{{[^"]*}}", align 1 // CHECK: @[[IDENT:.*]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[SRC_LOC]] }, align 8 // CHECK: @[[DYNA_ENV:.*]] = weak_odr protected global %struct.DynamicEnvironmentTy zeroinitializer @@ -14643,9 +14718,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge // CHECK: %[[TMP_A:.*]] = alloca ptr, align 8 // CHECK: store ptr %[[ADDR_A]], ptr %[[TMP_A]], align 8 // CHECK: %[[TMP_B:.*]] = alloca ptr, align 8 -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir ---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir 2024-06-12 10:43:15.400181266 -0500 -+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir 2024-06-12 10:44:09.359614154 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir 2024-10-18 17:40:33.936977609 -0500 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir 2024-11-23 20:39:47.204175279 -0600 @@ -1,6 +1,6 @@ // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s @@ -14654,16 +14729,16 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge llvm.func @omp_target_region_() { %0 = llvm.mlir.constant(20 : i32) : i32 %1 = llvm.mlir.constant(10 : i32) : i32 -@@ -37,5 +37,5 @@ +@@ -36,5 +36,5 @@ } } -// CHECK: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l19 +// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l19 // CHECK: ret void -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir ---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir 2024-06-12 10:43:15.400181266 -0500 -+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir 2024-06-12 10:44:09.359614154 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir 2024-08-27 20:36:32.060104964 -0500 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir 2024-11-23 20:39:47.204175279 -0600 @@ -5,7 +5,9 @@ module attributes {omp.is_target_device = true} { @@ -14675,9 +14750,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge omp.teams { llvm.call @foo(%arg0) : (i32) -> () omp.terminator -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir ---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir 2024-06-12 10:43:15.400181266 -0500 -+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir 2024-06-12 10:44:09.359614154 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir 2024-10-18 17:40:33.936977609 -0500 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir 2024-11-23 20:39:47.204175279 -0600 @@ -4,7 +4,9 @@ // for nested omp do loop with collapse clause inside omp target region @@ -14689,7 +14764,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge %loop_ub = llvm.mlir.constant(99 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : index) : i32 -@@ -25,7 +27,7 @@ +@@ -24,7 +26,7 @@ // CHECK: define void @[[FUNC_COLLAPSED_WSLOOP:.*]](ptr %[[ARG0:.*]]) // CHECK: call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), @@ -14698,9 +14773,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge // CHECK-SAME: i32 %[[NUM_THREADS:.*]], i32 0) // CHECK: define internal void @[[COLLAPSED_WSLOOP_BODY_FN]](i32 %[[LOOP_CNT:.*]], ptr %[[LOOP_BODY_ARG:.*]]) -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir ---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir 2024-06-12 10:43:15.400181266 -0500 -+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir 2024-06-12 10:44:09.359614154 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir 2024-10-18 17:40:33.936977609 -0500 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir 2024-11-23 20:39:47.204175279 -0600 @@ -4,7 +4,9 @@ // for nested omp do loop inside omp target region @@ -14712,7 +14787,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : i32) : i32 -@@ -19,7 +21,9 @@ +@@ -18,7 +20,9 @@ llvm.return } @@ -14723,7 +14798,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : i32) : i32 -@@ -36,10 +40,10 @@ +@@ -34,10 +38,10 @@ // CHECK: define void @[[FUNC0:.*]](ptr %[[ARG0:.*]]) // CHECK: %[[STRUCTARG:.*]] = alloca { ptr }, align 8, addrspace(5) // CHECK: %[[STRUCTARG_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[STRUCTARG]] to ptr @@ -14737,7 +14812,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge // CHECK: define internal void @[[LOOP_BODY_FN]](i32 %[[LOOP_CNT:.*]], ptr %[[LOOP_BODY_ARG:.*]]) // CHECK: %[[GEP2:.*]] = getelementptr { ptr }, ptr %[[LOOP_BODY_ARG]], i32 0, i32 0 -@@ -48,6 +52,6 @@ +@@ -46,6 +50,6 @@ // CHECK: store i32 %[[VAL0:.*]], ptr %[[GEP3]], align 4 // CHECK: define void @[[FUNC_EMPTY_WSLOOP:.*]]() @@ -14745,10 +14820,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge +// CHECK: call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), ptr @[[LOOP_EMPTY_BODY_FN:.*]], ptr null, i32 9, i32 %[[NUM_THREADS:.*]], i32 0) // CHECK: define internal void @[[LOOP_EMPTY_BODY_FN]](i32 %[[LOOP_CNT:.*]]) -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/openmp-llvm.mlir llvm-project/mlir/test/Target/LLVMIR/openmp-llvm.mlir ---- llvm-project.orig/mlir/test/Target/LLVMIR/openmp-llvm.mlir 2024-06-12 10:43:15.400181266 -0500 -+++ llvm-project/mlir/test/Target/LLVMIR/openmp-llvm.mlir 2024-06-12 10:44:09.359614154 -0500 -@@ -699,7 +699,7 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-llvm.mlir llvm-project-aso/mlir/test/Target/LLVMIR/openmp-llvm.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-llvm.mlir 2024-11-14 15:28:41.946639261 -0600 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/openmp-llvm.mlir 2024-11-23 20:39:47.204175279 -0600 +@@ -700,7 +700,7 @@ // CHECK-LABEL: @simd_simple_multiple llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd { @@ -14757,47 +14832,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/openmp-l %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 // The form of the emitted IR is controlled by OpenMPIRBuilder and // tested there. Just check that the right metadata is added and collapsed -@@ -726,6 +726,7 @@ - llvm.store %3, %5 : f32, !llvm.ptr - omp.yield - } -+ omp.terminator - } - llvm.return - } -@@ -749,6 +750,7 @@ - llvm.store %3, %5 : f32, !llvm.ptr - omp.yield - } -+ omp.terminator - } - llvm.return - } -@@ -769,6 +771,7 @@ - llvm.store %3, %5 : f32, !llvm.ptr - omp.yield - } -+ omp.terminator - } - llvm.return - } -@@ -788,6 +791,7 @@ - llvm.store %3, %5 : f32, !llvm.ptr - omp.yield - } -+ omp.terminator - } - llvm.return - } -@@ -816,6 +820,7 @@ - llvm.store %arg2, %1 : i32, !llvm.ptr - omp.yield - } -+ omp.terminator - } - llvm.return - } -@@ -2178,7 +2183,7 @@ +@@ -2323,7 +2323,7 @@ // CHECK: [[SECTION3]]: // CHECK: br label %[[REGION3:[^ ,]*]] // CHECK: [[REGION3]]: @@ -14806,23 +14841,42 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/openmp-l %add = llvm.add %arg0, %arg1 : i32 // CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 4 // CHECK: br label %{{.*}} -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/openmp-reduction.mlir llvm-project/mlir/test/Target/LLVMIR/openmp-reduction.mlir ---- llvm-project.orig/mlir/test/Target/LLVMIR/openmp-reduction.mlir 2024-06-12 10:43:15.400181266 -0500 -+++ llvm-project/mlir/test/Target/LLVMIR/openmp-reduction.mlir 2024-06-12 10:44:09.363614112 -0500 -@@ -565,8 +565,8 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-reduction.mlir llvm-project-aso/mlir/test/Target/LLVMIR/openmp-reduction.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-reduction.mlir 2024-11-06 08:35:35.855248041 -0600 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/openmp-reduction.mlir 2024-11-23 20:39:47.204175279 -0600 +@@ -559,7 +559,7 @@ // CHECK: define internal void @[[OUTLINED]] // Private reduction variable and its initialization. -// CHECK: %[[PRIVATE:[0-9]+]] = alloca i32 --// CHECK: store i32 0, ptr %[[PRIVATE]] +// CHECK: %[[PRIVATE:private_redvar]] = alloca i32 -+// CHECK-NEXT: store i32 0, ptr %[[PRIVATE]] + // CHECK: store i32 0, ptr %[[PRIVATE]] // Loop exit: - // CHECK: call void @__kmpc_barrier -diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir llvm-project/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir ---- llvm-project.orig/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir 2024-06-12 10:43:15.400181266 -0500 -+++ llvm-project/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir 2024-06-12 10:44:09.363614112 -0500 +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir llvm-project-aso/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir 2024-10-18 17:40:33.940977568 -0500 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir 2024-11-23 20:39:47.204175279 -0600 +@@ -3,7 +3,7 @@ + // This tests check that target code nested inside a target data region which + // has only use_device_ptr mapping corectly generates code on the device pass. + +-// CHECK: define weak_odr protected void @__omp_offloading{{.*}}main_ ++// CHECK: define weak_odr protected {{.*}} void @__omp_offloading{{.*}}main_ + // CHECK-NEXT: entry: + // CHECK-NEXT: %[[VAL_3:.*]] = alloca ptr, align 8 + // CHECK-NEXT: store ptr %[[VAL_4:.*]], ptr %[[VAL_3]], align 8 +@@ -17,7 +17,7 @@ + // CHECK-NEXT: %[[VAL_13:.*]] = load ptr, ptr %[[VAL_11]], align 8 + // CHECK-NEXT: store i32 999, ptr %[[VAL_13]], align 4 + // CHECK-NEXT: br label %[[VAL_14:.*]] +-module attributes {omp.is_target_device = true } { ++module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd-amdhsa"} { + llvm.func @_QQmain() attributes {fir.bindc_name = "main"} { + %0 = llvm.mlir.constant(1 : i64) : i64 + %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir llvm-project-aso/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir 2024-10-18 17:40:33.940977568 -0500 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir 2024-11-23 20:39:47.204175279 -0600 @@ -3,7 +3,7 @@ // This tests the fix for https://github.com/llvm/llvm-project/issues/84606 // We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash. @@ -14832,20 +14886,162 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/openmp-t llvm.func @_QQmain() attributes {fir.bindc_name = "main", omp.declare_target = #omp.declaretarget} { %0 = llvm.mlir.constant(0 : i32) : i32 %1 = llvm.mlir.constant(1 : i64) : i64 -diff -Naur -x .git -x '*.pyc' llvm-project.orig/offload/test/offloading/fortran/target_private.f90 llvm-project/offload/test/offloading/fortran/target_private.f90 ---- llvm-project.orig/offload/test/offloading/fortran/target_private.f90 1969-12-31 18:00:00.000000000 -0600 -+++ llvm-project/offload/test/offloading/fortran/target_private.f90 2024-06-12 10:44:09.363614112 -0500 -@@ -0,0 +1,29 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-todo.mlir llvm-project-aso/mlir/test/Target/LLVMIR/openmp-todo.mlir +--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-todo.mlir 2024-11-14 15:28:41.946639261 -0600 ++++ llvm-project-aso/mlir/test/Target/LLVMIR/openmp-todo.mlir 2024-11-23 20:39:47.204175279 -0600 +@@ -66,10 +66,55 @@ + + // ----- + +-llvm.func @distribute(%lb : i32, %ub : i32, %step : i32) { +- // expected-error@below {{not yet implemented: omp.distribute}} ++llvm.func @distribute_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) { ++ // expected-error@below {{not yet implemented: Unhandled clause allocate in omp.distribute operation}} + // expected-error@below {{LLVM Translation failed for operation: omp.distribute}} +- omp.distribute { ++ omp.distribute allocate(%x : !llvm.ptr -> %x : !llvm.ptr) { ++ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { ++ omp.yield ++ } ++ } ++ llvm.return ++} ++ ++// ----- ++ ++llvm.func @distribute_dist_schedule(%lb : i32, %ub : i32, %step : i32, %x : i32) { ++ // expected-error@below {{not yet implemented: Unhandled clause dist_schedule in omp.distribute operation}} ++ // expected-error@below {{LLVM Translation failed for operation: omp.distribute}} ++ omp.distribute dist_schedule_static dist_schedule_chunk_size(%x : i32) { ++ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { ++ omp.yield ++ } ++ } ++ llvm.return ++} ++ ++// ----- ++ ++llvm.func @distribute_order(%lb : i32, %ub : i32, %step : i32) { ++ // expected-error@below {{not yet implemented: Unhandled clause order in omp.distribute operation}} ++ // expected-error@below {{LLVM Translation failed for operation: omp.distribute}} ++ omp.distribute order(concurrent) { ++ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { ++ omp.yield ++ } ++ } ++ llvm.return ++} ++ ++// ----- ++ ++omp.private {type = private} @x.privatizer : !llvm.ptr alloc { ++^bb0(%arg0: !llvm.ptr): ++ %0 = llvm.mlir.constant(1 : i32) : i32 ++ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr ++ omp.yield(%1 : !llvm.ptr) ++} ++llvm.func @distribute_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) { ++ // expected-error@below {{not yet implemented: Unhandled clause privatization in omp.distribute operation}} ++ // expected-error@below {{LLVM Translation failed for operation: omp.distribute}} ++ omp.distribute private(@x.privatizer %x -> %arg0 : !llvm.ptr) { + omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.yield + } +@@ -278,17 +323,6 @@ + + // ----- + +-llvm.func @target_if(%x : i1) { +- // expected-error@below {{not yet implemented: Unhandled clause if in omp.target operation}} +- // expected-error@below {{LLVM Translation failed for operation: omp.target}} +- omp.target if(%x) { +- omp.terminator +- } +- llvm.return +-} +- +-// ----- +- + omp.declare_reduction @add_f32 : f32 + init { + ^bb0(%arg: f32): +@@ -364,17 +398,6 @@ + + // ----- + +-llvm.func @target_thread_limit(%x : i32) { +- // expected-error@below {{not yet implemented: Unhandled clause thread_limit in omp.target operation}} +- // expected-error@below {{LLVM Translation failed for operation: omp.target}} +- omp.target thread_limit(%x : i32) { +- omp.terminator +- } +- llvm.return +-} +- +-// ----- +- + llvm.func @target_enter_data_depend(%x: !llvm.ptr) { + // expected-error@below {{not yet implemented: Unhandled clause depend in omp.target_enter_data operation}} + // expected-error@below {{LLVM Translation failed for operation: omp.target_enter_data}} +@@ -578,34 +601,6 @@ + omp.terminator + } + llvm.return +-} +- +-// ----- +- +-omp.declare_reduction @add_f32 : f32 +-init { +-^bb0(%arg: f32): +- %0 = llvm.mlir.constant(0.0 : f32) : f32 +- omp.yield (%0 : f32) +-} +-combiner { +-^bb1(%arg0: f32, %arg1: f32): +- %1 = llvm.fadd %arg0, %arg1 : f32 +- omp.yield (%1 : f32) +-} +-atomic { +-^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr): +- %2 = llvm.load %arg3 : !llvm.ptr -> f32 +- llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32 +- omp.yield +-} +-llvm.func @teams_reduction(%x : !llvm.ptr) { +- // expected-error@below {{not yet implemented: Unhandled clause reduction in omp.teams operation}} +- // expected-error@below {{LLVM Translation failed for operation: omp.teams}} +- omp.teams reduction(@add_f32 %x -> %prv : !llvm.ptr) { +- omp.terminator +- } +- llvm.return + } + + // ----- +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/offload/CMakeLists.txt llvm-project-aso/offload/CMakeLists.txt +--- llvm-project-aso-orig/offload/CMakeLists.txt 2024-11-14 15:28:41.950639246 -0600 ++++ llvm-project-aso/offload/CMakeLists.txt 2024-11-23 20:39:47.204175279 -0600 +@@ -101,9 +101,9 @@ + + # Check for flang + if (NOT MSVC) +- set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang) ++ set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang-new) + else() +- set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang.exe) ++ set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang-new.exe) + endif() + + # Set fortran test compiler if flang is found +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/offload/test/offloading/fortran/target_private.f90 llvm-project-aso/offload/test/offloading/fortran/target_private.f90 +--- llvm-project-aso-orig/offload/test/offloading/fortran/target_private.f90 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-aso/offload/test/offloading/fortran/target_private.f90 2024-11-23 20:39:47.204175279 -0600 +@@ -0,0 +1,23 @@ +! Basic offloading test with a target region -+! REQUIRES: flang -+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO -+! UNSUPPORTED: aarch64-unknown-linux-gnu -+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO -+! UNSUPPORTED: x86_64-pc-linux-gnu -+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO -+ -+! RUN: %libomptarget-compile-fortran-generic -+! RUN: env LIBOMPTARGET_INFO=16 %libomptarget-run-generic 2>&1 | %fcheck-generic ++! REQUIRES: flang, amdgpu ++ ++! RUN: %libomptarget-compile-fortran-run-and-check-generic +program target_update + implicit none + integer :: x(1) @@ -14862,13 +15058,28 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/offload/test/offloading/fortran/ + print *, "y =", y(1) + +end program target_update -+! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}} ++ +! CHECK: x = 42 +! CHECK: y = 84 -diff -Naur -x .git -x '*.pyc' llvm-project.orig/openmp/runtime/src/CMakeLists.txt llvm-project/openmp/runtime/src/CMakeLists.txt ---- llvm-project.orig/openmp/runtime/src/CMakeLists.txt 2024-06-12 10:43:15.448180762 -0500 -+++ llvm-project/openmp/runtime/src/CMakeLists.txt 2024-06-12 10:44:09.363614112 -0500 -@@ -147,6 +147,11 @@ +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/openmp/CMakeLists.txt llvm-project-aso/openmp/CMakeLists.txt +--- llvm-project-aso-orig/openmp/CMakeLists.txt 2024-10-18 17:40:33.952977445 -0500 ++++ llvm-project-aso/openmp/CMakeLists.txt 2024-11-23 20:39:47.204175279 -0600 +@@ -79,9 +79,9 @@ + + # Check for flang + if (NOT MSVC) +- set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang) ++ set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang-new) + else() +- set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang.exe) ++ set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang-new.exe) + endif() + + # Set fortran test compiler if flang is found +diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/openmp/runtime/src/CMakeLists.txt llvm-project-aso/openmp/runtime/src/CMakeLists.txt +--- llvm-project-aso-orig/openmp/runtime/src/CMakeLists.txt 2024-11-23 20:25:27.515272748 -0600 ++++ llvm-project-aso/openmp/runtime/src/CMakeLists.txt 2024-11-23 20:39:47.204175279 -0600 +@@ -153,6 +153,11 @@ set_source_files_properties(${LIBOMP_CXXFILES} PROPERTIES COMPILE_FLAGS "${LIBOMP_CONFIGURED_CXXFLAGS}") set_source_files_properties(${LIBOMP_ASMFILES} ${LIBOMP_GNUASMFILES} PROPERTIES COMPILE_FLAGS "${LIBOMP_CONFIGURED_ASMFLAGS}") diff --git a/trunk/patches/ATD_full.patch b/trunk/patches/ATD_full.patch index 79fd32dca..173a45fd3 100644 --- a/trunk/patches/ATD_full.patch +++ b/trunk/patches/ATD_full.patch @@ -1,6 +1,6 @@ -diff -Naur -x .git llvm-project.upstream/clang/include/clang/Basic/DiagnosticDriverKinds.td llvm-project/clang/include/clang/Basic/DiagnosticDriverKinds.td ---- llvm-project.upstream/clang/include/clang/Basic/DiagnosticDriverKinds.td 2024-11-19 12:55:58.281826424 -0500 -+++ llvm-project/clang/include/clang/Basic/DiagnosticDriverKinds.td 2024-11-19 12:49:04.556151210 -0500 +diff -Naur -x .git llvm-project-trunk/clang/include/clang/Basic/DiagnosticDriverKinds.td llvm-project-trunk-atd/clang/include/clang/Basic/DiagnosticDriverKinds.td +--- llvm-project-trunk/clang/include/clang/Basic/DiagnosticDriverKinds.td 2024-11-23 20:25:45.999206301 -0600 ++++ llvm-project-trunk-atd/clang/include/clang/Basic/DiagnosticDriverKinds.td 2024-11-23 20:26:13.503107411 -0600 @@ -146,9 +146,6 @@ def warn_drv_unsupported_openmp_library : Warning< "the library '%0=%1' is not supported, OpenMP will not be enabled">, @@ -11,9 +11,9 @@ diff -Naur -x .git llvm-project.upstream/clang/include/clang/Basic/DiagnosticDri def err_drv_invalid_thread_model_for_target : Error< "invalid thread model '%0' in '%1' for this target">; -diff -Naur -x .git llvm-project.upstream/clang/include/clang/Basic/DiagnosticGroups.td llvm-project/clang/include/clang/Basic/DiagnosticGroups.td ---- llvm-project.upstream/clang/include/clang/Basic/DiagnosticGroups.td 2024-11-19 12:54:59.881016873 -0500 -+++ llvm-project/clang/include/clang/Basic/DiagnosticGroups.td 2024-11-19 12:49:04.556151210 -0500 +diff -Naur -x .git llvm-project-trunk/clang/include/clang/Basic/DiagnosticGroups.td llvm-project-trunk-atd/clang/include/clang/Basic/DiagnosticGroups.td +--- llvm-project-trunk/clang/include/clang/Basic/DiagnosticGroups.td 2024-11-23 20:30:46.590124482 -0600 ++++ llvm-project-trunk-atd/clang/include/clang/Basic/DiagnosticGroups.td 2024-10-29 11:08:03.513484408 -0500 @@ -1584,7 +1584,3 @@ // Warnings about using the non-standard extension having an explicit specialization // with a storage class specifier. @@ -22,10 +22,10 @@ diff -Naur -x .git llvm-project.upstream/clang/include/clang/Basic/DiagnosticGro -// A warning for options that enable a feature that is not yet complete -def ExperimentalOption : DiagGroup<"experimental-option">; - -diff -Naur -x .git llvm-project.upstream/clang/include/clang/Driver/Options.td llvm-project/clang/include/clang/Driver/Options.td ---- llvm-project.upstream/clang/include/clang/Driver/Options.td 2024-11-19 12:55:58.290826394 -0500 -+++ llvm-project/clang/include/clang/Driver/Options.td 2024-11-19 12:49:04.571151163 -0500 -@@ -6112,7 +6112,7 @@ +diff -Naur -x .git llvm-project-trunk/clang/include/clang/Driver/Options.td llvm-project-trunk-atd/clang/include/clang/Driver/Options.td +--- llvm-project-trunk/clang/include/clang/Driver/Options.td 2024-11-23 20:30:46.598124454 -0600 ++++ llvm-project-trunk-atd/clang/include/clang/Driver/Options.td 2024-11-23 20:26:13.515107367 -0600 +@@ -6118,7 +6118,7 @@ def _sysroot : Separate<["--"], "sysroot">, Alias<_sysroot_EQ>; //===----------------------------------------------------------------------===// @@ -34,7 +34,7 @@ diff -Naur -x .git llvm-project.upstream/clang/include/clang/Driver/Options.td l //===----------------------------------------------------------------------===// let Visibility = [ClangOption, FlangOption] in { -@@ -6128,7 +6128,7 @@ +@@ -6134,7 +6134,7 @@ } // let Vis = [Default, FlangOption] //===----------------------------------------------------------------------===// @@ -43,7 +43,7 @@ diff -Naur -x .git llvm-project.upstream/clang/include/clang/Driver/Options.td l //===----------------------------------------------------------------------===// let Flags = [TargetSpecific] in { let Visibility = [ClangOption, FlangOption] in { -@@ -6876,6 +6876,7 @@ +@@ -6882,6 +6882,7 @@ defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">; defm implicit_none : OptInFC1FFlag<"implicit-none", "No implicit typing allowed unless overridden by IMPLICIT statements">; defm underscoring : OptInFC1FFlag<"underscoring", "Appends one trailing underscore to external names">; @@ -51,7 +51,7 @@ diff -Naur -x .git llvm-project.upstream/clang/include/clang/Driver/Options.td l defm ppc_native_vec_elem_order: BoolOptionWithoutMarshalling<"f", "ppc-native-vector-element-order", PosFlag, NegFlag>; -@@ -6892,6 +6893,10 @@ +@@ -6898,6 +6899,10 @@ def fhermetic_module_files : Flag<["-"], "fhermetic-module-files">, Group, HelpText<"Emit hermetic module files (no nested USE association)">; @@ -62,7 +62,7 @@ diff -Naur -x .git llvm-project.upstream/clang/include/clang/Driver/Options.td l } // let Visibility = [FC1Option, FlangOption] def J : JoinedOrSeparate<["-"], "J">, -@@ -8266,7 +8271,7 @@ +@@ -8272,7 +8277,7 @@ // CUDA Options //===----------------------------------------------------------------------===// @@ -71,10 +71,10 @@ diff -Naur -x .git llvm-project.upstream/clang/include/clang/Driver/Options.td l def fcuda_is_device : Flag<["-"], "fcuda-is-device">, HelpText<"Generate code for CUDA device">, -diff -Naur -x .git llvm-project.upstream/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp ---- llvm-project.upstream/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp 2024-11-19 12:55:58.343826221 -0500 -+++ llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp 2024-11-19 12:49:04.656150898 -0500 -@@ -745,14 +745,14 @@ +diff -Naur -x .git llvm-project-trunk/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp llvm-project-trunk-atd/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +--- llvm-project-trunk/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp 2024-11-23 20:25:46.067206057 -0600 ++++ llvm-project-trunk-atd/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp 2024-11-23 20:26:13.599107065 -0600 +@@ -744,14 +744,14 @@ void CGOpenMPRuntimeGPU::emitKernelInit(const OMPExecutableDirective &D, CodeGenFunction &CGF, EntryFunctionState &EST, bool IsSPMD) { @@ -95,7 +95,7 @@ diff -Naur -x .git llvm-project.upstream/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cp if (!IsSPMD) emitGenericVarsProlog(CGF, EST.Loc); } -@@ -1659,7 +1659,6 @@ +@@ -1658,7 +1658,6 @@ return; bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind); @@ -103,7 +103,7 @@ diff -Naur -x .git llvm-project.upstream/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cp bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind); ASTContext &C = CGM.getContext(); -@@ -1756,7 +1755,7 @@ +@@ -1755,7 +1754,7 @@ llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createReductionsGPU( OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, false, TeamsReduction, @@ -112,10 +112,10 @@ diff -Naur -x .git llvm-project.upstream/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cp CGF.getTarget().getGridValue(), C.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc); assert(AfterIP && "unexpected error creating GPU reductions"); -diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/Driver.cpp llvm-project/clang/lib/Driver/Driver.cpp ---- llvm-project.upstream/clang/lib/Driver/Driver.cpp 2024-11-19 12:55:58.353826188 -0500 -+++ llvm-project/clang/lib/Driver/Driver.cpp 2024-11-19 12:49:04.671150852 -0500 -@@ -2029,7 +2029,7 @@ +diff -Naur -x .git llvm-project-trunk/clang/lib/Driver/Driver.cpp llvm-project-trunk-atd/clang/lib/Driver/Driver.cpp +--- llvm-project-trunk/clang/lib/Driver/Driver.cpp 2024-11-23 20:30:46.622124367 -0600 ++++ llvm-project-trunk-atd/clang/lib/Driver/Driver.cpp 2024-11-23 20:26:13.615107008 -0600 +@@ -2028,7 +2028,7 @@ void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const { if (IsFlangMode()) { @@ -124,10 +124,10 @@ diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/Driver.cpp llvm-projec } else { // FIXME: The following handlers should use a callback mechanism, we don't // know what the client would like to do. -diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/ToolChain.cpp llvm-project/clang/lib/Driver/ToolChain.cpp ---- llvm-project.upstream/clang/lib/Driver/ToolChain.cpp 2024-11-19 12:55:58.354826185 -0500 -+++ llvm-project/clang/lib/Driver/ToolChain.cpp 2024-11-19 12:49:04.672150848 -0500 -@@ -416,9 +416,6 @@ +diff -Naur -x .git llvm-project-trunk/clang/lib/Driver/ToolChain.cpp llvm-project-trunk-atd/clang/lib/Driver/ToolChain.cpp +--- llvm-project-trunk/clang/lib/Driver/ToolChain.cpp 2024-11-23 20:25:46.079206014 -0600 ++++ llvm-project-trunk-atd/clang/lib/Driver/ToolChain.cpp 2024-11-23 20:26:13.615107008 -0600 +@@ -414,9 +414,6 @@ {"cl", "--driver-mode=cl"}, {"++", "--driver-mode=g++"}, {"flang", "--driver-mode=flang"}, @@ -137,10 +137,10 @@ diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/ToolChain.cpp llvm-pro {"clang-dxc", "--driver-mode=dxc"}, }; -diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/ToolChains/Clang.cpp llvm-project/clang/lib/Driver/ToolChains/Clang.cpp ---- llvm-project.upstream/clang/lib/Driver/ToolChains/Clang.cpp 2024-11-19 12:55:58.357826175 -0500 -+++ llvm-project/clang/lib/Driver/ToolChains/Clang.cpp 2024-11-19 12:49:04.675150839 -0500 -@@ -8897,7 +8897,9 @@ +diff -Naur -x .git llvm-project-trunk/clang/lib/Driver/ToolChains/Clang.cpp llvm-project-trunk-atd/clang/lib/Driver/ToolChains/Clang.cpp +--- llvm-project-trunk/clang/lib/Driver/ToolChains/Clang.cpp 2024-11-23 20:25:46.083206000 -0600 ++++ llvm-project-trunk-atd/clang/lib/Driver/ToolChains/Clang.cpp 2024-11-23 20:26:13.619106993 -0600 +@@ -8902,7 +8902,9 @@ assert(Input.isFilename() && "Invalid input."); CmdArgs.push_back(Input.getFilename()); @@ -151,9 +151,9 @@ diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/ToolChains/Clang.cpp l if (D.CC1Main && !D.CCGenDiagnostics) { // Invoke cc1as directly in this process. C.addCommand(std::make_unique( -diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/ToolChains/Flang.cpp llvm-project/clang/lib/Driver/ToolChains/Flang.cpp ---- llvm-project.upstream/clang/lib/Driver/ToolChains/Flang.cpp 2024-11-19 12:55:00.012016447 -0500 -+++ llvm-project/clang/lib/Driver/ToolChains/Flang.cpp 2024-11-19 12:49:04.676150836 -0500 +diff -Naur -x .git llvm-project-trunk/clang/lib/Driver/ToolChains/Flang.cpp llvm-project-trunk-atd/clang/lib/Driver/ToolChains/Flang.cpp +--- llvm-project-trunk/clang/lib/Driver/ToolChains/Flang.cpp 2024-11-23 20:30:46.626124353 -0600 ++++ llvm-project-trunk-atd/clang/lib/Driver/ToolChains/Flang.cpp 2024-11-14 15:29:46.794383844 -0600 @@ -120,7 +120,9 @@ options::OPT_fintrinsic_modules_path, options::OPT_pedantic, options::OPT_std_EQ, options::OPT_W_Joined, @@ -202,9 +202,9 @@ diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/ToolChains/Flang.cpp l +Flang::Flang(const ToolChain &TC) : Tool("flang-new", "flang frontend", TC) {} Flang::~Flang() {} -diff -Naur -x .git llvm-project.upstream/clang/test/ClangScanDeps/multiple-commands.c llvm-project/clang/test/ClangScanDeps/multiple-commands.c ---- llvm-project.upstream/clang/test/ClangScanDeps/multiple-commands.c 2022-11-28 13:39:57.977654739 -0500 -+++ llvm-project/clang/test/ClangScanDeps/multiple-commands.c 2024-05-13 09:10:50.667094940 -0400 +diff -Naur -x .git llvm-project-trunk/clang/test/ClangScanDeps/multiple-commands.c llvm-project-trunk-atd/clang/test/ClangScanDeps/multiple-commands.c +--- llvm-project-trunk/clang/test/ClangScanDeps/multiple-commands.c 2024-08-27 20:04:03.984046081 -0500 ++++ llvm-project-trunk-atd/clang/test/ClangScanDeps/multiple-commands.c 2024-08-28 08:37:25.104601402 -0500 @@ -133,7 +133,7 @@ // CHECK-NEXT: "{{.*}}tu_save_temps_module.o" // CHECK: "{{.*}}tu_save_temps_module.s" @@ -214,9 +214,9 @@ diff -Naur -x .git llvm-project.upstream/clang/test/ClangScanDeps/multiple-comma // CHECK: "input-file": "[[PREFIX]]{{.}}tu_save_temps_module.c" // CHECK-NEXT: } // CHECK-NEXT: ] -diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/flang.f90 llvm-project/clang/test/Driver/flang/flang.f90 ---- llvm-project.upstream/clang/test/Driver/flang/flang.f90 2024-11-19 12:55:00.400015185 -0500 -+++ llvm-project/clang/test/Driver/flang/flang.f90 2024-02-19 15:32:04.212924968 -0500 +diff -Naur -x .git llvm-project-trunk/clang/test/Driver/flang/flang.f90 llvm-project-trunk-atd/clang/test/Driver/flang/flang.f90 +--- llvm-project-trunk/clang/test/Driver/flang/flang.f90 2024-10-18 17:40:44.568868604 -0500 ++++ llvm-project-trunk-atd/clang/test/Driver/flang/flang.f90 2024-08-27 20:12:44.001839780 -0500 @@ -13,7 +13,7 @@ ! * (no type specified, resulting in an object file) @@ -226,9 +226,9 @@ diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/flang.f90 llvm- ! Check that f90 files are not treated as "previously preprocessed" ! ... in --driver-mode=flang. -diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/flang_ucase.F90 llvm-project/clang/test/Driver/flang/flang_ucase.F90 ---- llvm-project.upstream/clang/test/Driver/flang/flang_ucase.F90 2024-11-19 12:55:00.400015185 -0500 -+++ llvm-project/clang/test/Driver/flang/flang_ucase.F90 2024-02-19 15:32:04.212924968 -0500 +diff -Naur -x .git llvm-project-trunk/clang/test/Driver/flang/flang_ucase.F90 llvm-project-trunk-atd/clang/test/Driver/flang/flang_ucase.F90 +--- llvm-project-trunk/clang/test/Driver/flang/flang_ucase.F90 2024-10-18 17:40:44.568868604 -0500 ++++ llvm-project-trunk-atd/clang/test/Driver/flang/flang_ucase.F90 2024-08-27 20:12:44.001839780 -0500 @@ -13,7 +13,7 @@ ! * (no type specified, resulting in an object file) @@ -238,9 +238,9 @@ diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/flang_ucase.F90 ! Check that f90 files are not treated as "previously preprocessed" ! ... in --driver-mode=flang. -diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/multiple-inputs.f90 llvm-project/clang/test/Driver/flang/multiple-inputs.f90 ---- llvm-project.upstream/clang/test/Driver/flang/multiple-inputs.f90 2024-11-19 12:55:00.400015185 -0500 -+++ llvm-project/clang/test/Driver/flang/multiple-inputs.f90 2024-02-19 15:32:04.212924968 -0500 +diff -Naur -x .git llvm-project-trunk/clang/test/Driver/flang/multiple-inputs.f90 llvm-project-trunk-atd/clang/test/Driver/flang/multiple-inputs.f90 +--- llvm-project-trunk/clang/test/Driver/flang/multiple-inputs.f90 2024-10-18 17:40:44.568868604 -0500 ++++ llvm-project-trunk-atd/clang/test/Driver/flang/multiple-inputs.f90 2024-08-27 20:12:44.001839780 -0500 @@ -1,7 +1,7 @@ ! Check that flang driver can handle multiple inputs at once. @@ -251,9 +251,9 @@ diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/multiple-inputs -! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}flang{{[^"/]*}}" "-fc1" +! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}flang-new{{[^"/]*}}" "-fc1" ! CHECK-SYNTAX-ONLY: "{{[^"]*}}/Inputs/two.f90" -diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/multiple-inputs-mixed.f90 llvm-project/clang/test/Driver/flang/multiple-inputs-mixed.f90 ---- llvm-project.upstream/clang/test/Driver/flang/multiple-inputs-mixed.f90 2024-11-19 12:55:00.400015185 -0500 -+++ llvm-project/clang/test/Driver/flang/multiple-inputs-mixed.f90 2024-02-19 15:32:04.212924968 -0500 +diff -Naur -x .git llvm-project-trunk/clang/test/Driver/flang/multiple-inputs-mixed.f90 llvm-project-trunk-atd/clang/test/Driver/flang/multiple-inputs-mixed.f90 +--- llvm-project-trunk/clang/test/Driver/flang/multiple-inputs-mixed.f90 2024-10-18 17:40:44.568868604 -0500 ++++ llvm-project-trunk-atd/clang/test/Driver/flang/multiple-inputs-mixed.f90 2024-08-27 20:12:44.001839780 -0500 @@ -1,7 +1,7 @@ ! Check that flang can handle mixed C and fortran inputs. @@ -263,9 +263,9 @@ diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/multiple-inputs ! CHECK-SYNTAX-ONLY: "{{[^"]*}}/Inputs/one.f90" ! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}clang{{[^"/]*}}" "-cc1" ! CHECK-SYNTAX-ONLY: "{{[^"]*}}/Inputs/other.c" -diff -Naur -x .git llvm-project.upstream/clang/test/Driver/hip-target-id.hip llvm-project/clang/test/Driver/hip-target-id.hip ---- llvm-project.upstream/clang/test/Driver/hip-target-id.hip 2024-07-09 19:05:25.426802216 -0400 -+++ llvm-project/clang/test/Driver/hip-target-id.hip 2024-06-17 09:19:11.129662101 -0400 +diff -Naur -x .git llvm-project-trunk/clang/test/Driver/hip-target-id.hip llvm-project-trunk-atd/clang/test/Driver/hip-target-id.hip +--- llvm-project-trunk/clang/test/Driver/hip-target-id.hip 2024-08-27 20:04:04.692043863 -0500 ++++ llvm-project-trunk-atd/clang/test/Driver/hip-target-id.hip 2024-08-28 08:37:25.104601402 -0500 @@ -26,7 +26,7 @@ // CHECK-SAME: "-target-feature" "+sramecc" // CHECK-SAME: "-target-feature" "+xnack" @@ -275,9 +275,9 @@ diff -Naur -x .git llvm-project.upstream/clang/test/Driver/hip-target-id.hip llv // TMP-SAME: "-target-cpu" "gfx908" // TMP-SAME: "-target-feature" "+sramecc" // TMP-SAME: "-target-feature" "+xnack" -diff -Naur -x .git llvm-project.upstream/clang/test/OpenMP/irbuilder_nested_parallel_for.c llvm-project/clang/test/OpenMP/irbuilder_nested_parallel_for.c ---- llvm-project.upstream/clang/test/OpenMP/irbuilder_nested_parallel_for.c 2024-08-19 13:39:47.298756651 -0400 -+++ llvm-project/clang/test/OpenMP/irbuilder_nested_parallel_for.c 2024-08-20 14:56:30.867033548 -0400 +diff -Naur -x .git llvm-project-trunk/clang/test/OpenMP/irbuilder_nested_parallel_for.c llvm-project-trunk-atd/clang/test/OpenMP/irbuilder_nested_parallel_for.c +--- llvm-project-trunk/clang/test/OpenMP/irbuilder_nested_parallel_for.c 2024-08-27 20:04:04.880043275 -0500 ++++ llvm-project-trunk-atd/clang/test/OpenMP/irbuilder_nested_parallel_for.c 2024-08-28 08:37:25.104601402 -0500 @@ -120,14 +120,14 @@ // CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 @@ -3706,9 +3706,9 @@ diff -Naur -x .git llvm-project.upstream/clang/test/OpenMP/irbuilder_nested_para +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META436]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG439]] // -diff -Naur -x .git llvm-project.upstream/flang/docs/DoConcurrentConversionToOpenMP.md llvm-project/flang/docs/DoConcurrentConversionToOpenMP.md ---- llvm-project.upstream/flang/docs/DoConcurrentConversionToOpenMP.md 1969-12-31 19:00:00.000000000 -0500 -+++ llvm-project/flang/docs/DoConcurrentConversionToOpenMP.md 2024-08-12 11:55:29.868279345 -0400 +diff -Naur -x .git llvm-project-trunk/flang/docs/DoConcurrentConversionToOpenMP.md llvm-project-trunk-atd/flang/docs/DoConcurrentConversionToOpenMP.md +--- llvm-project-trunk/flang/docs/DoConcurrentConversionToOpenMP.md 1969-12-31 18:00:00.000000000 -0600 ++++ llvm-project-trunk-atd/flang/docs/DoConcurrentConversionToOpenMP.md 2024-08-28 08:37:25.104601402 -0500 @@ -0,0 +1,332 @@ +