diff --git a/bin/patches/ATD_ASO_full.patch b/bin/patches/ATD_ASO_full.patch
index 7880dd34b..02e51a57c 100644
--- a/bin/patches/ATD_ASO_full.patch
+++ b/bin/patches/ATD_ASO_full.patch
@@ -1,10 +1,18 @@
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/include/clang/Driver/Options.td llvm-project/clang/include/clang/Driver/Options.td
---- llvm-project.orig/clang/include/clang/Driver/Options.td	2024-06-12 10:43:11.776219369 -0500
-+++ llvm-project/clang/include/clang/Driver/Options.td	2024-06-12 10:44:09.343614323 -0500
-@@ -6757,6 +6757,10 @@
- defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stride",
-   PosFlag<SetTrue, [], [ClangOption], "Create unit-strided versions of loops">,
-    NegFlag<SetFalse, [], [ClangOption], "Do not create unit-strided loops (default)">>;
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/clang/include/clang/Driver/Options.td llvm-project-aso/clang/include/clang/Driver/Options.td
+--- llvm-project-aso-orig/clang/include/clang/Driver/Options.td	2024-11-23 20:25:26.659275825 -0600
++++ llvm-project-aso/clang/include/clang/Driver/Options.td	2024-11-23 20:39:47.168175409 -0600
+@@ -7027,6 +7027,7 @@
+ defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">;
+ defm implicit_none : OptInFC1FFlag<"implicit-none", "No implicit typing allowed unless overridden by IMPLICIT statements">;
+ defm underscoring : OptInFC1FFlag<"underscoring", "Appends one trailing underscore to external names">;
++defm offload_global_filtering : OptInFC1FFlag<"offload-global-filtering", "Enable/disable OpenMP global filtering pass">;
+ defm ppc_native_vec_elem_order: BoolOptionWithoutMarshalling<"f", "ppc-native-vector-element-order",
+   PosFlag<SetTrue, [], [ClangOption], "Specifies PowerPC native vector element order (default)">,
+   NegFlag<SetFalse, [], [ClangOption], "Specifies PowerPC non-native vector element order">>;
+@@ -7043,6 +7044,10 @@
+ 
+ def fhermetic_module_files : Flag<["-"], "fhermetic-module-files">, Group<f_Group>,
+   HelpText<"Emit hermetic module files (no nested USE association)">;
 +
 +def do_concurrent_parallel_EQ : Joined<["-"], "fdo-concurrent-parallel=">,
 +  HelpText<"Try to map `do concurrent` loops to OpenMP (on host or device)">,
@@ -12,10 +20,19 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/include/clang/Driver/Optio
  } // let Visibility = [FC1Option, FlangOption]
  
  def J : JoinedOrSeparate<["-"], "J">,
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
---- llvm-project.orig/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp	2024-06-12 10:43:11.868218401 -0500
-+++ llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp	2024-06-12 10:44:09.343614323 -0500
-@@ -887,14 +887,13 @@
+@@ -8421,7 +8426,7 @@
+ // CUDA Options
+ //===----------------------------------------------------------------------===//
+ 
+-let Visibility = [CC1Option] in {
++let Visibility = [CC1Option, FC1Option] in {
+ 
+ def fcuda_is_device : Flag<["-"], "fcuda-is-device">,
+   HelpText<"Generate code for CUDA device">,
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp llvm-project-aso/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+--- llvm-project-aso-orig/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp	2024-11-23 20:25:26.707275652 -0600
++++ llvm-project-aso/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp	2024-11-23 20:39:47.168175409 -0600
+@@ -862,14 +862,14 @@
  void CGOpenMPRuntimeGPU::emitKernelInit(const OMPExecutableDirective &D,
                                          CodeGenFunction &CGF,
                                          EntryFunctionState &EST, bool IsSPMD) {
@@ -25,8 +42,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/lib/CodeGen/CGOpenMPRuntim
 -                                  MinTeamsVal, MaxTeamsVal);
 +  // Get NumTeams and ThreadLimit attributes.
 +  llvm::OpenMPIRBuilder::TargetKernelDefaultBounds Bounds;
-+  computeMinAndMaxThreadsAndTeams(D, CGF, Bounds.MinThreads, Bounds.MaxThreads,
-+                                  Bounds.MinTeams, Bounds.MaxTeams);
++  computeMinAndMaxThreadsAndTeams(
++      D, CGF, Bounds.MinThreads, Bounds.MaxThreads.emplace_back(-1),
++      Bounds.MinTeams, Bounds.MaxTeams.emplace_back(-1));
  
    CGBuilderTy &Bld = CGF.Builder;
 -  Bld.restoreIP(OMPBuilder.createTargetInit(
@@ -35,10 +53,27 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/lib/CodeGen/CGOpenMPRuntim
    if (!IsSPMD)
      emitGenericVarsProlog(CGF, EST.Loc);
  }
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/lib/Driver/ToolChains/Clang.cpp llvm-project/clang/lib/Driver/ToolChains/Clang.cpp
---- llvm-project.orig/clang/lib/Driver/ToolChains/Clang.cpp	2024-06-12 10:43:11.904218022 -0500
-+++ llvm-project/clang/lib/Driver/ToolChains/Clang.cpp	2024-06-12 10:44:09.343614323 -0500
-@@ -8862,7 +8862,9 @@
+@@ -1889,7 +1889,6 @@
+     return;
+ 
+   bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind);
+-  bool DistributeReduction = isOpenMPDistributeDirective(Options.ReductionKind);
+   bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind);
+ 
+   ASTContext &C = CGM.getContext();
+@@ -1986,7 +1985,7 @@
+   llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+       OMPBuilder.createReductionsGPU(
+           OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, false, TeamsReduction,
+-          DistributeReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,
++          llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,
+           CGF.getTarget().getGridValue(),
+           C.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc);
+   assert(AfterIP && "unexpected error creating GPU reductions");
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/clang/lib/Driver/ToolChains/Clang.cpp llvm-project-aso/clang/lib/Driver/ToolChains/Clang.cpp
+--- llvm-project-aso-orig/clang/lib/Driver/ToolChains/Clang.cpp	2024-11-23 20:25:26.719275609 -0600
++++ llvm-project-aso/clang/lib/Driver/ToolChains/Clang.cpp	2024-11-23 20:39:47.172175395 -0600
+@@ -9077,7 +9077,9 @@
    assert(Input.isFilename() && "Invalid input.");
    CmdArgs.push_back(Input.getFilename());
  
@@ -49,10 +84,21 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/lib/Driver/ToolChains/Clan
    if (D.CC1Main && !D.CCGenDiagnostics) {
      // Invoke cc1as directly in this process.
      C.addCommand(std::make_unique<CC1Command>(
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/lib/Driver/ToolChains/Flang.cpp llvm-project/clang/lib/Driver/ToolChains/Flang.cpp
---- llvm-project.orig/clang/lib/Driver/ToolChains/Flang.cpp	2024-06-12 10:43:11.908217981 -0500
-+++ llvm-project/clang/lib/Driver/ToolChains/Flang.cpp	2024-06-12 10:44:09.343614323 -0500
-@@ -141,7 +141,8 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/clang/lib/Driver/ToolChains/Flang.cpp llvm-project-aso/clang/lib/Driver/ToolChains/Flang.cpp
+--- llvm-project-aso-orig/clang/lib/Driver/ToolChains/Flang.cpp	2024-11-23 20:25:26.723275595 -0600
++++ llvm-project-aso/clang/lib/Driver/ToolChains/Flang.cpp	2024-11-23 20:40:24.480040785 -0600
+@@ -120,7 +120,9 @@
+                    options::OPT_fintrinsic_modules_path, options::OPT_pedantic,
+                    options::OPT_std_EQ, options::OPT_W_Joined,
+                    options::OPT_fconvert_EQ, options::OPT_fpass_plugin_EQ,
+-                   options::OPT_funderscoring, options::OPT_fno_underscoring});
++                   options::OPT_funderscoring, options::OPT_fno_underscoring,
++                   options::OPT_foffload_global_filtering,
++                   options::OPT_fno_offload_global_filtering});
+ 
+   llvm::codegenoptions::DebugInfoKind DebugInfoKind;
+   if (Args.hasArg(options::OPT_gN_Group)) {
+@@ -150,7 +152,8 @@
                              options::OPT_flang_deprecated_no_hlfir,
                              options::OPT_flang_experimental_integer_overflow,
                              options::OPT_fno_ppc_native_vec_elem_order,
@@ -62,9 +108,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/lib/Driver/ToolChains/Flan
  }
  
  void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/ClangScanDeps/multiple-commands.c llvm-project/clang/test/ClangScanDeps/multiple-commands.c
---- llvm-project.orig/clang/test/ClangScanDeps/multiple-commands.c	2023-08-31 11:50:49.134212787 -0500
-+++ llvm-project/clang/test/ClangScanDeps/multiple-commands.c	2024-06-12 10:44:09.343614323 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/clang/test/ClangScanDeps/multiple-commands.c llvm-project-aso/clang/test/ClangScanDeps/multiple-commands.c
+--- llvm-project-aso-orig/clang/test/ClangScanDeps/multiple-commands.c	2024-10-18 14:42:28.502433945 -0500
++++ llvm-project-aso/clang/test/ClangScanDeps/multiple-commands.c	2024-11-23 20:39:47.172175395 -0600
 @@ -134,7 +134,7 @@
  // CHECK-NEXT:           "{{.*}}tu_save_temps_module.o"
  // CHECK:                "{{.*}}tu_save_temps_module.s"
@@ -74,9 +120,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/ClangScanDeps/multipl
  // CHECK:              "input-file": "[[PREFIX]]{{.}}tu_save_temps_module.c"
  // CHECK-NEXT:       }
  // CHECK-NEXT:     ]
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/Driver/hip-target-id.hip llvm-project/clang/test/Driver/hip-target-id.hip
---- llvm-project.orig/clang/test/Driver/hip-target-id.hip	2024-06-12 10:43:12.324213607 -0500
-+++ llvm-project/clang/test/Driver/hip-target-id.hip	2024-06-12 10:44:09.347614281 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/clang/test/Driver/hip-target-id.hip llvm-project-aso/clang/test/Driver/hip-target-id.hip
+--- llvm-project-aso-orig/clang/test/Driver/hip-target-id.hip	2024-08-27 20:36:24.320182191 -0500
++++ llvm-project-aso/clang/test/Driver/hip-target-id.hip	2024-11-23 20:39:47.172175395 -0600
 @@ -26,7 +26,7 @@
  // CHECK-SAME: "-target-feature" "+sramecc"
  // CHECK-SAME: "-target-feature" "+xnack"
@@ -86,9 +132,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/Driver/hip-target-id.
  // TMP-SAME: "-target-cpu" "gfx908"
  // TMP-SAME: "-target-feature" "+sramecc"
  // TMP-SAME: "-target-feature" "+xnack"
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nested_parallel_for.c llvm-project/clang/test/OpenMP/irbuilder_nested_parallel_for.c
---- llvm-project.orig/clang/test/OpenMP/irbuilder_nested_parallel_for.c	2023-08-31 11:50:49.858235198 -0500
-+++ llvm-project/clang/test/OpenMP/irbuilder_nested_parallel_for.c	2024-06-12 10:44:09.347614281 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/clang/test/OpenMP/irbuilder_nested_parallel_for.c llvm-project-aso/clang/test/OpenMP/irbuilder_nested_parallel_for.c
+--- llvm-project-aso-orig/clang/test/OpenMP/irbuilder_nested_parallel_for.c	2024-08-27 20:36:24.516180232 -0500
++++ llvm-project-aso/clang/test/OpenMP/irbuilder_nested_parallel_for.c	2024-11-23 20:39:47.172175395 -0600
 @@ -120,14 +120,14 @@
  // CHECK-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0)
  // CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4
@@ -194,14 +240,14 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
  // CHECK:       omp.par.exit.split:
 -// CHECK-NEXT:    store i32 0, ptr [[I185]], align 4
--// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0
+-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0
 -// CHECK-NEXT:    store ptr [[I185]], ptr [[TMP0]], align 8
--// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0
+-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0
 -// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I185]], align 4
 +// CHECK-NEXT:    store i32 0, ptr [[I191]], align 4
-+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED192]], i32 0, i32 0
++// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED192]], i32 0, i32 0
 +// CHECK-NEXT:    store ptr [[I191]], ptr [[TMP0]], align 8
-+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED193]], i32 0, i32 0
++// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED193]], i32 0, i32 0
 +// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I191]], align 4
  // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP1]], align 4
 -// CHECK-NEXT:    call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]])
@@ -289,35 +335,35 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-NEXT:    ret void
  //
  //
-@@ -507,7 +507,7 @@
+@@ -507,11 +507,11 @@
  // CHECK-NEXT:    [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8
  // CHECK-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
  // CHECK-NEXT:    [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8
 -// CHECK-NEXT:    [[STRUCTARG214:%.*]] = alloca { ptr, ptr, ptr }, align 8
+-// CHECK-NEXT:    [[P_LASTITER178:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_LOWERBOUND179:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_UPPERBOUND180:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_STRIDE181:%.*]] = alloca i32, align 4
 +// CHECK-NEXT:    [[STRUCTARG221:%.*]] = alloca { ptr, ptr, ptr }, align 8
- // CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
- // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
- // CHECK-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
-@@ -520,14 +520,14 @@
++// CHECK-NEXT:    [[P_LASTITER183:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_LOWERBOUND184:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_UPPERBOUND185:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_STRIDE186:%.*]] = alloca i32, align 4
+ // CHECK-NEXT:    [[P_LASTITER:%.*]] = alloca i32, align 4
  // CHECK-NEXT:    [[P_LOWERBOUND:%.*]] = alloca i32, align 4
  // CHECK-NEXT:    [[P_UPPERBOUND:%.*]] = alloca i32, align 4
- // CHECK-NEXT:    [[P_STRIDE:%.*]] = alloca i32, align 4
+@@ -524,10 +524,10 @@
+ // CHECK-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8
+ // CHECK-NEXT:    [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4
+ // CHECK-NEXT:    [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
 -// CHECK-NEXT:    [[I160:%.*]] = alloca i32, align 4
 -// CHECK-NEXT:    [[AGG_CAPTURED161:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8
 -// CHECK-NEXT:    [[AGG_CAPTURED162:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4
 -// CHECK-NEXT:    [[DOTCOUNT_ADDR163:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_LASTITER178:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_LOWERBOUND179:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_UPPERBOUND180:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_STRIDE181:%.*]] = alloca i32, align 4
 +// CHECK-NEXT:    [[I165:%.*]] = alloca i32, align 4
 +// CHECK-NEXT:    [[AGG_CAPTURED166:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8
 +// CHECK-NEXT:    [[AGG_CAPTURED167:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4
 +// CHECK-NEXT:    [[DOTCOUNT_ADDR168:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_LASTITER183:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_LOWERBOUND184:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_UPPERBOUND185:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_STRIDE186:%.*]] = alloca i32, align 4
  // CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
  // CHECK:       omp.par.region:
  // CHECK-NEXT:    store i32 0, ptr [[I]], align 4
@@ -364,9 +410,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-NEXT:    br label [[OMP_PAR_EXIT11_SPLIT:%.*]]
  // CHECK:       omp.par.exit11.split:
 -// CHECK-NEXT:    store i32 0, ptr [[I160]], align 4
--// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED161]], i32 0, i32 0
+-// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED161]], i32 0, i32 0
 -// CHECK-NEXT:    store ptr [[I160]], ptr [[TMP10]], align 8
--// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED162]], i32 0, i32 0
+-// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED162]], i32 0, i32 0
 -// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I160]], align 4
 -// CHECK-NEXT:    store i32 [[TMP12]], ptr [[TMP11]], align 4
 -// CHECK-NEXT:    call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]])
@@ -397,9 +443,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 -// CHECK-NEXT:    br label [[OMP_LOOP_AFTER171:%.*]]
 -// CHECK:       omp_loop.after171:
 +// CHECK-NEXT:    store i32 0, ptr [[I165]], align 4
-+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED166]], i32 0, i32 0
++// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED166]], i32 0, i32 0
 +// CHECK-NEXT:    store ptr [[I165]], ptr [[TMP9]], align 8
-+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED167]], i32 0, i32 0
++// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED167]], i32 0, i32 0
 +// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I165]], align 4
 +// CHECK-NEXT:    store i32 [[TMP11]], ptr [[TMP10]], align 4
 +// CHECK-NEXT:    call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR168]], ptr [[AGG_CAPTURED166]])
@@ -490,51 +536,52 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-NEXT:    ret void
  //
  //
-@@ -656,7 +656,7 @@
+@@ -656,16 +656,16 @@
  // CHECK-NEXT:    [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8
  // CHECK-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
  // CHECK-NEXT:    [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8
 -// CHECK-NEXT:    [[STRUCTARG209:%.*]] = alloca { ptr, ptr, ptr }, align 8
 +// CHECK-NEXT:    [[STRUCTARG216:%.*]] = alloca { ptr, ptr, ptr }, align 8
  // CHECK-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8
- // CHECK-NEXT:    [[TID_ADDR_LOCAL12:%.*]] = alloca i32, align 4
- // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR6]], align 4
-@@ -670,22 +670,22 @@
+-// CHECK-NEXT:    [[P_LASTITER153:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_LOWERBOUND154:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_UPPERBOUND155:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_STRIDE156:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_LASTITER93:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_LOWERBOUND94:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_UPPERBOUND95:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_STRIDE96:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_LASTITER157:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_LOWERBOUND158:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_UPPERBOUND159:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_STRIDE160:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_LASTITER95:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_LOWERBOUND96:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_UPPERBOUND97:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_STRIDE98:%.*]] = alloca i32, align 4
+ // CHECK-NEXT:    [[P_LASTITER34:%.*]] = alloca i32, align 4
  // CHECK-NEXT:    [[P_LOWERBOUND35:%.*]] = alloca i32, align 4
  // CHECK-NEXT:    [[P_UPPERBOUND36:%.*]] = alloca i32, align 4
- // CHECK-NEXT:    [[P_STRIDE37:%.*]] = alloca i32, align 4
+@@ -678,14 +678,14 @@
+ // CHECK-NEXT:    [[AGG_CAPTURED17:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8
+ // CHECK-NEXT:    [[AGG_CAPTURED18:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4
+ // CHECK-NEXT:    [[DOTCOUNT_ADDR19:%.*]] = alloca i32, align 4
 -// CHECK-NEXT:    [[I75:%.*]] = alloca i32, align 4
 -// CHECK-NEXT:    [[AGG_CAPTURED76:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8
 -// CHECK-NEXT:    [[AGG_CAPTURED77:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4
 -// CHECK-NEXT:    [[DOTCOUNT_ADDR78:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_LASTITER93:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_LOWERBOUND94:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_UPPERBOUND95:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_STRIDE96:%.*]] = alloca i32, align 4
 -// CHECK-NEXT:    [[I135:%.*]] = alloca i32, align 4
 -// CHECK-NEXT:    [[AGG_CAPTURED136:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8
 -// CHECK-NEXT:    [[AGG_CAPTURED137:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4
 -// CHECK-NEXT:    [[DOTCOUNT_ADDR138:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_LASTITER153:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_LOWERBOUND154:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_UPPERBOUND155:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_STRIDE156:%.*]] = alloca i32, align 4
 +// CHECK-NEXT:    [[I77:%.*]] = alloca i32, align 4
 +// CHECK-NEXT:    [[AGG_CAPTURED78:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8
 +// CHECK-NEXT:    [[AGG_CAPTURED79:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4
 +// CHECK-NEXT:    [[DOTCOUNT_ADDR80:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_LASTITER95:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_LOWERBOUND96:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_UPPERBOUND97:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_STRIDE98:%.*]] = alloca i32, align 4
 +// CHECK-NEXT:    [[I139:%.*]] = alloca i32, align 4
 +// CHECK-NEXT:    [[AGG_CAPTURED140:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8
 +// CHECK-NEXT:    [[AGG_CAPTURED141:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4
 +// CHECK-NEXT:    [[DOTCOUNT_ADDR142:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_LASTITER157:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_LOWERBOUND158:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_UPPERBOUND159:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_STRIDE160:%.*]] = alloca i32, align 4
  // CHECK-NEXT:    br label [[OMP_PAR_REGION9:%.*]]
  // CHECK:       omp.par.region9:
  // CHECK-NEXT:    store i32 0, ptr [[I16]], align 4
@@ -574,9 +621,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 -// CHECK-NEXT:    br label [[OMP_PAR_EXIT46_SPLIT:%.*]]
 -// CHECK:       omp.par.exit46.split:
 -// CHECK-NEXT:    store i32 0, ptr [[I75]], align 4
--// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED76]], i32 0, i32 0
+-// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED76]], i32 0, i32 0
 -// CHECK-NEXT:    store ptr [[I75]], ptr [[TMP10]], align 8
--// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED77]], i32 0, i32 0
+-// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED77]], i32 0, i32 0
 -// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I75]], align 4
 -// CHECK-NEXT:    store i32 [[TMP12]], ptr [[TMP11]], align 4
 -// CHECK-NEXT:    call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR78]], ptr [[AGG_CAPTURED76]])
@@ -609,9 +656,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 +// CHECK-NEXT:    br label [[OMP_PAR_EXIT47_SPLIT:%.*]]
 +// CHECK:       omp.par.exit47.split:
 +// CHECK-NEXT:    store i32 0, ptr [[I77]], align 4
-+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED78]], i32 0, i32 0
++// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED78]], i32 0, i32 0
 +// CHECK-NEXT:    store ptr [[I77]], ptr [[TMP9]], align 8
-+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED79]], i32 0, i32 0
++// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED79]], i32 0, i32 0
 +// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I77]], align 4
 +// CHECK-NEXT:    store i32 [[TMP11]], ptr [[TMP10]], align 4
 +// CHECK-NEXT:    call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR80]], ptr [[AGG_CAPTURED78]])
@@ -637,9 +684,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 -// CHECK-NEXT:    br label [[OMP_PAR_EXIT105_SPLIT:%.*]]
 -// CHECK:       omp.par.exit105.split:
 -// CHECK-NEXT:    store i32 0, ptr [[I135]], align 4
--// CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED136]], i32 0, i32 0
+-// CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED136]], i32 0, i32 0
 -// CHECK-NEXT:    store ptr [[I135]], ptr [[TMP18]], align 8
--// CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED137]], i32 0, i32 0
+-// CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED137]], i32 0, i32 0
 -// CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[I135]], align 4
 -// CHECK-NEXT:    store i32 [[TMP20]], ptr [[TMP19]], align 4
 -// CHECK-NEXT:    call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]])
@@ -702,9 +749,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 +// CHECK-NEXT:    br label [[OMP_PAR_EXIT108_SPLIT:%.*]]
 +// CHECK:       omp.par.exit108.split:
 +// CHECK-NEXT:    store i32 0, ptr [[I139]], align 4
-+// CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED140]], i32 0, i32 0
++// CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED140]], i32 0, i32 0
 +// CHECK-NEXT:    store ptr [[I139]], ptr [[TMP16]], align 8
-+// CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED141]], i32 0, i32 0
++// CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED141]], i32 0, i32 0
 +// CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I139]], align 4
 +// CHECK-NEXT:    store i32 [[TMP18]], ptr [[TMP17]], align 4
 +// CHECK-NEXT:    call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR142]], ptr [[AGG_CAPTURED140]])
@@ -834,6 +881,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-NEXT:    [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8
  // CHECK-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
  // CHECK-NEXT:    [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8
+-// CHECK-NEXT:    [[P_LASTITER128:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_LOWERBOUND129:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_UPPERBOUND130:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_STRIDE131:%.*]] = alloca i32, align 4
 -// CHECK-NEXT:    [[TID_ADDR_LOCAL106:%.*]] = alloca i32, align 4
 -// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR100]], align 4
 -// CHECK-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL106]], align 4
@@ -842,17 +893,17 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 -// CHECK-NEXT:    [[AGG_CAPTURED111:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8
 -// CHECK-NEXT:    [[AGG_CAPTURED112:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4
 -// CHECK-NEXT:    [[DOTCOUNT_ADDR113:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_LASTITER128:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_LOWERBOUND129:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_UPPERBOUND130:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_STRIDE131:%.*]] = alloca i32, align 4
 -// CHECK-NEXT:    br label [[OMP_PAR_REGION103:%.*]]
 -// CHECK:       omp.par.region103:
 -// CHECK-NEXT:    store i32 0, ptr [[I110]], align 4
--// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED111]], i32 0, i32 0
+-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED111]], i32 0, i32 0
 -// CHECK-NEXT:    store ptr [[I110]], ptr [[TMP2]], align 8
--// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED112]], i32 0, i32 0
+-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED112]], i32 0, i32 0
 -// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I110]], align 4
++// CHECK-NEXT:    [[P_LASTITER131:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_LOWERBOUND132:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_UPPERBOUND133:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_STRIDE134:%.*]] = alloca i32, align 4
 +// CHECK-NEXT:    [[TID_ADDR_LOCAL109:%.*]] = alloca i32, align 4
 +// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR103]], align 4
 +// CHECK-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL109]], align 4
@@ -861,16 +912,12 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 +// CHECK-NEXT:    [[AGG_CAPTURED114:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8
 +// CHECK-NEXT:    [[AGG_CAPTURED115:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4
 +// CHECK-NEXT:    [[DOTCOUNT_ADDR116:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_LASTITER131:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_LOWERBOUND132:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_UPPERBOUND133:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_STRIDE134:%.*]] = alloca i32, align 4
 +// CHECK-NEXT:    br label [[OMP_PAR_REGION106:%.*]]
 +// CHECK:       omp.par.region106:
 +// CHECK-NEXT:    store i32 0, ptr [[I113]], align 4
-+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED114]], i32 0, i32 0
++// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED114]], i32 0, i32 0
 +// CHECK-NEXT:    store ptr [[I113]], ptr [[TMP2]], align 8
-+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED115]], i32 0, i32 0
++// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED115]], i32 0, i32 0
 +// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I113]], align 4
  // CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4
 -// CHECK-NEXT:    call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR113]], ptr [[AGG_CAPTURED111]])
@@ -981,6 +1028,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-NEXT:    [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8
  // CHECK-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
  // CHECK-NEXT:    [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8
+-// CHECK-NEXT:    [[P_LASTITER69:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_LOWERBOUND70:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_UPPERBOUND71:%.*]] = alloca i32, align 4
+-// CHECK-NEXT:    [[P_STRIDE72:%.*]] = alloca i32, align 4
 -// CHECK-NEXT:    [[TID_ADDR_LOCAL47:%.*]] = alloca i32, align 4
 -// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR41]], align 4
 -// CHECK-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL47]], align 4
@@ -989,17 +1040,17 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 -// CHECK-NEXT:    [[AGG_CAPTURED52:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8
 -// CHECK-NEXT:    [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4
 -// CHECK-NEXT:    [[DOTCOUNT_ADDR54:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_LASTITER69:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_LOWERBOUND70:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_UPPERBOUND71:%.*]] = alloca i32, align 4
--// CHECK-NEXT:    [[P_STRIDE72:%.*]] = alloca i32, align 4
 -// CHECK-NEXT:    br label [[OMP_PAR_REGION44:%.*]]
 -// CHECK:       omp.par.region44:
 -// CHECK-NEXT:    store i32 0, ptr [[I51]], align 4
--// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED52]], i32 0, i32 0
+-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED52]], i32 0, i32 0
 -// CHECK-NEXT:    store ptr [[I51]], ptr [[TMP2]], align 8
--// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED53]], i32 0, i32 0
+-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED53]], i32 0, i32 0
 -// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I51]], align 4
++// CHECK-NEXT:    [[P_LASTITER70:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_LOWERBOUND71:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_UPPERBOUND72:%.*]] = alloca i32, align 4
++// CHECK-NEXT:    [[P_STRIDE73:%.*]] = alloca i32, align 4
 +// CHECK-NEXT:    [[TID_ADDR_LOCAL48:%.*]] = alloca i32, align 4
 +// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR42]], align 4
 +// CHECK-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL48]], align 4
@@ -1008,16 +1059,12 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 +// CHECK-NEXT:    [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8
 +// CHECK-NEXT:    [[AGG_CAPTURED54:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4
 +// CHECK-NEXT:    [[DOTCOUNT_ADDR55:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_LASTITER70:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_LOWERBOUND71:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_UPPERBOUND72:%.*]] = alloca i32, align 4
-+// CHECK-NEXT:    [[P_STRIDE73:%.*]] = alloca i32, align 4
 +// CHECK-NEXT:    br label [[OMP_PAR_REGION45:%.*]]
 +// CHECK:       omp.par.region45:
 +// CHECK-NEXT:    store i32 0, ptr [[I52]], align 4
-+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED53]], i32 0, i32 0
++// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED53]], i32 0, i32 0
 +// CHECK-NEXT:    store ptr [[I52]], ptr [[TMP2]], align 8
-+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED54]], i32 0, i32 0
++// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED54]], i32 0, i32 0
 +// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I52]], align 4
  // CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4
 -// CHECK-NEXT:    call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR54]], ptr [[AGG_CAPTURED52]])
@@ -1146,125 +1193,125 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    [[DOTSTOP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[DOTSTEP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG41:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META39:![0-9]+]], !DIExpression(), [[META40:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META40:![0-9]+]], !DIExpression(), [[META41:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG41]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META41:![0-9]+]], !DIExpression(), [[META40]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META42:![0-9]+]], !DIExpression(), [[META41]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META43:![0-9]+]], metadata !DIExpression()), !dbg [[DBG45:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG46:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG46]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG46]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG45]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META48:![0-9]+]], metadata !DIExpression()), !dbg [[DBG49:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG49]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG44:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG45:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG45]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG45]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG44]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG48]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META42:![0-9]+]], !DIExpression(), [[META44:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG45:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG45]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG45]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META44]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META47:![0-9]+]], !DIExpression(), [[META48:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META49:![0-9]+]], !DIExpression(), [[META48]])
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META48]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META43:![0-9]+]], !DIExpression(), [[META45:![0-9]+]])
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG46:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG46]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG46]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META45]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META48:![0-9]+]], !DIExpression(), [[META49:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META49]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META50:![0-9]+]], !DIExpression(), [[META49]])
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META49]]
  // CHECK-DEBUG:       cond.true:
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG49]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG48]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META48]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META49]]
  // CHECK-DEBUG:       cond.false:
--// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG49]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG48]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META48]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META49]]
  // CHECK-DEBUG:       cond.end:
--// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG49]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG51:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG48]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG50:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META48]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG50:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META49]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG51:![0-9]+]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.1
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG53:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG52:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG52:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG53:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META60:![0-9]+]], metadata !DIExpression()), !dbg [[DBG61:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META60:![0-9]+]], !DIExpression(), [[META61:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META61:![0-9]+]], !DIExpression(), [[META62:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG61]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META62:![0-9]+]], !DIExpression(), [[META61]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META63:![0-9]+]], !DIExpression(), [[META62]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG61]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META63:![0-9]+]], !DIExpression(), [[META61]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META64:![0-9]+]], !DIExpression(), [[META62]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG65:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG65]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG67:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG67]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG67]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG67]]
--// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG62]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG65]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG64:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG64]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG66:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG66]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG66]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG66]]
-+// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG61]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG64]]
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG64:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG64]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG66:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG66]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG66]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG66]]
+-// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META61]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG64]]
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG65:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG65]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG67:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG67]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG67]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG67]]
++// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META62]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG65]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid
--// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG70:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG69:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG69:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG70:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[STRUCTARG17:%.*]] = alloca { ptr, ptr, ptr }, align 8
  // CHECK-DEBUG-NEXT:    [[R_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
  // CHECK-DEBUG-NEXT:    store ptr [[R]], ptr [[R_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[R_ADDR]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[R_ADDR]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[R_ADDR]], [[META75:![0-9]+]], !DIExpression(), [[META76:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[R_ADDR]], [[META76:![0-9]+]], !DIExpression(), [[META77:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[A_ADDR]], [[META77:![0-9]+]], !DIExpression(), [[META78:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[A_ADDR]], [[META78:![0-9]+]], !DIExpression(), [[META79:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store double [[B]], ptr [[B_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG81:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG82:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG80:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG81:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[B_ADDR]], [[META79:![0-9]+]], !DIExpression(), [[META80:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG81:![0-9]+]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[B_ADDR]], [[META80:![0-9]+]], !DIExpression(), [[META81:![0-9]+]])
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG82:![0-9]+]]
  // CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
  // CHECK-DEBUG:       omp_parallel:
  // CHECK-DEBUG-NEXT:    [[GEP_A_ADDR18:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 0
@@ -1272,19 +1319,19 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    store ptr [[B_ADDR]], ptr [[GEP_B_ADDR19]], align 8
  // CHECK-DEBUG-NEXT:    [[GEP_R_ADDR20:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 2
  // CHECK-DEBUG-NEXT:    store ptr [[R_ADDR]], ptr [[GEP_R_ADDR20]], align 8
--// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]]), !dbg [[DBG83:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]]), !dbg [[DBG82:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]]), !dbg [[DBG82:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]]), !dbg [[DBG83:![0-9]+]]
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
  // CHECK-DEBUG:       omp.par.outlined.exit16:
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
  // CHECK-DEBUG:       omp.par.exit.split:
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG85:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG84:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG84:![0-9]+]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG85:![0-9]+]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid..omp_par.4
--// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG86:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG85:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG85:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG86:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  omp.par.entry:
  // CHECK-DEBUG-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
  // CHECK-DEBUG-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
@@ -1292,8 +1339,8 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION:%.*]]
  // CHECK-DEBUG:       omp.par.region:
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[DBG87:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[DBG86:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[DBG86:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[DBG87:![0-9]+]]
  // CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
  // CHECK-DEBUG:       omp_parallel:
  // CHECK-DEBUG-NEXT:    [[GEP_A_ADDR1:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
@@ -1301,136 +1348,136 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR2]], align 8
  // CHECK-DEBUG-NEXT:    [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
  // CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8
--// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB8]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG89:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB8]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG88:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB8]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG88:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB8]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG89:![0-9]+]]
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
  // CHECK-DEBUG:       omp.par.outlined.exit:
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT7_SPLIT:%.*]]
  // CHECK-DEBUG:       omp.par.exit7.split:
--// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG93:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG92:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG92:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG93:![0-9]+]]
  // CHECK-DEBUG:       omp.par.region.parallel.after:
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
  // CHECK-DEBUG:       omp.par.pre_finalize:
--// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]], !dbg [[DBG93]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]], !dbg [[DBG92]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]], !dbg [[DBG92]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]], !dbg [[DBG93]]
  // CHECK-DEBUG:       omp.par.outlined.exit16.exitStub:
  // CHECK-DEBUG-NEXT:    ret void
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid..omp_par
--// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR2:%.*]], ptr noalias [[ZERO_ADDR3:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG94:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR2:%.*]], ptr noalias [[ZERO_ADDR3:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG93:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR2:%.*]], ptr noalias [[ZERO_ADDR3:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG93:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR2:%.*]], ptr noalias [[ZERO_ADDR3:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG94:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  omp.par.entry4:
  // CHECK-DEBUG-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
  // CHECK-DEBUG-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
 @@ -1745,65 +1745,65 @@
- // CHECK-DEBUG-NEXT:    [[P_STRIDE:%.*]] = alloca i32, align 4
+ // CHECK-DEBUG-NEXT:    [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION5:%.*]]
  // CHECK-DEBUG:       omp.par.region5:
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG100:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I]], align 4, !dbg [[DBG100]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG101:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED12]], i32 0, i32 0, !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG102:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG101]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META94:![0-9]+]], metadata !DIExpression()), !dbg [[DBG99:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I]], align 4, !dbg [[DBG99]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG100:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED12]], i32 0, i32 0, !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG101:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I]], [[META94:![0-9]+]], !DIExpression(), [[META99:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I]], align 4, !dbg [[META99]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG100:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED12]], i32 0, i32 0, !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG101:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG100]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I]], [[META95:![0-9]+]], !DIExpression(), [[META100:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I]], align 4, !dbg [[META100]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG101:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED12]], i32 0, i32 0, !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG102:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG101]]
  // CHECK-DEBUG:       omp_loop.preheader:
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10:[0-9]+]]), !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG101]]
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10:[0-9]+]]), !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS1:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS1]], 1, !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10:[0-9]+]]), !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG100]]
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10:[0-9]+]]), !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS1:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS1]], 1, !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG101]]
  // CHECK-DEBUG:       omp_loop.header:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG101]]
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG100]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG101]]
  // CHECK-DEBUG:       omp_loop.cond:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG101]]
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP8]], !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG100]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP8]], !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG101]]
  // CHECK-DEBUG:       omp_loop.exit:
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]]), !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM15:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10]]), !dbg [[DBG103:![0-9]+]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB11:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM15]]), !dbg [[DBG103]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG101]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]]), !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM15:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10]]), !dbg [[DBG102:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB11:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM15]]), !dbg [[DBG102]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]]), !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM15:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10]]), !dbg [[DBG102:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB11:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM15]]), !dbg [[DBG102]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG100]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]]), !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM15:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10]]), !dbg [[DBG103:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB11:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM15]]), !dbg [[DBG103]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG101]]
  // CHECK-DEBUG:       omp_loop.after:
--// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION5_PARALLEL_AFTER:%.*]], !dbg [[DBG104:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION5_PARALLEL_AFTER:%.*]], !dbg [[DBG103:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION5_PARALLEL_AFTER:%.*]], !dbg [[DBG103:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION5_PARALLEL_AFTER:%.*]], !dbg [[DBG104:![0-9]+]]
  // CHECK-DEBUG:       omp.par.region5.parallel.after:
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE6:%.*]]
  // CHECK-DEBUG:       omp.par.pre_finalize6:
--// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG104]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG103]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG103]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG104]]
  // CHECK-DEBUG:       omp_loop.body:
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG103]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.3(ptr [[I]], i32 [[TMP10]], ptr [[AGG_CAPTURED12]]), !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG105:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG105]]
--// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG107:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], [[TMP12]], !dbg [[DBG108:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV13:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG105]]
--// CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG109:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store float [[CONV13]], ptr [[TMP13]], align 4, !dbg [[DBG110:![0-9]+]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC]], !dbg [[DBG101]]
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG102]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.3(ptr [[I]], i32 [[TMP9]], ptr [[AGG_CAPTURED12]]), !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG104:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double, !dbg [[DBG104]]
-+// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG106:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], [[TMP11]], !dbg [[DBG107:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV13:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG104]]
-+// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG108:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store float [[CONV13]], ptr [[TMP12]], align 4, !dbg [[DBG109:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC]], !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG102]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.3(ptr [[I]], i32 [[TMP10]], ptr [[AGG_CAPTURED12]]), !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG104:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG104]]
+-// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG106:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], [[TMP12]], !dbg [[DBG107:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV13:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG104]]
+-// CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG108:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store float [[CONV13]], ptr [[TMP13]], align 4, !dbg [[DBG109:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC]], !dbg [[DBG100]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG103]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.3(ptr [[I]], i32 [[TMP9]], ptr [[AGG_CAPTURED12]]), !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG105:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double, !dbg [[DBG105]]
++// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG107:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], [[TMP11]], !dbg [[DBG108:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV13:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG105]]
++// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG109:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store float [[CONV13]], ptr [[TMP12]], align 4, !dbg [[DBG110:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC]], !dbg [[DBG101]]
  // CHECK-DEBUG:       omp_loop.inc:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG101]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[DBG101]]
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG100]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG100]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[DBG100]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG101]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[DBG101]]
  // CHECK-DEBUG:       omp.par.outlined.exit.exitStub:
  // CHECK-DEBUG-NEXT:    ret void
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.2
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG111:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG110:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG110:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG111:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
@@ -1438,109 +1485,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    [[DOTSTOP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[DOTSTEP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META112:![0-9]+]], metadata !DIExpression()), !dbg [[DBG113:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG112:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META111:![0-9]+]], !DIExpression(), [[META112:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META112:![0-9]+]], !DIExpression(), [[META113:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG113]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG112]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META113:![0-9]+]], !DIExpression(), [[META112]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META114:![0-9]+]], !DIExpression(), [[META113]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG118:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG118]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG118]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG117]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG121]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG117:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG117]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG117]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG116]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG120:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG120]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META114:![0-9]+]], !DIExpression(), [[META116:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG117:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG117]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG117]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META116]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META119:![0-9]+]], !DIExpression(), [[META120:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META121:![0-9]+]], !DIExpression(), [[META120]])
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META120]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META115:![0-9]+]], !DIExpression(), [[META117:![0-9]+]])
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG118:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG118]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG118]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META117]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META120:![0-9]+]], !DIExpression(), [[META121:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META121]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META122:![0-9]+]], !DIExpression(), [[META121]])
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META121]]
  // CHECK-DEBUG:       cond.true:
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG121]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG120]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META120]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META121]]
  // CHECK-DEBUG:       cond.false:
--// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG121]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG120]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META120]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META121]]
  // CHECK-DEBUG:       cond.end:
--// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG121]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG123:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG120]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG122:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META120]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG122:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META121]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG123:![0-9]+]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.3
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG125:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG124:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG124:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG125:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META125:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META125:![0-9]+]], !DIExpression(), [[META126:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META126:![0-9]+]], !DIExpression(), [[META127:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META127:![0-9]+]], !DIExpression(), [[META126]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META128:![0-9]+]], !DIExpression(), [[META127]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META128:![0-9]+]], !DIExpression(), [[META126]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META129:![0-9]+]], !DIExpression(), [[META127]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG130:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG130]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG132:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG132]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG132]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG132]]
--// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG127]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG130]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG129:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG129]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG131:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG131]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG131]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG131]]
-+// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG126]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG129]]
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG129:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG129]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG131:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG131]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG131]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG131]]
+-// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META126]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG129]]
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG130:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG130]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG132:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG132]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG132]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG132]]
++// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META127]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG130]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid
--// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG133:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG132:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG132:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG133:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8
  // CHECK-DEBUG-NEXT:    [[R_ADDR:%.*]] = alloca ptr, align 8
@@ -1563,16 +1610,16 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 +// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND211:%.*]] = alloca i32, align 4
 +// CHECK-DEBUG-NEXT:    [[P_STRIDE212:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    store ptr [[R]], ptr [[R_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[R_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG135:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[R_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[R_ADDR]], [[META133:![0-9]+]], !DIExpression(), [[META134:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[R_ADDR]], [[META134:![0-9]+]], !DIExpression(), [[META135:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[A_ADDR]], [[META135:![0-9]+]], !DIExpression(), [[META136:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[A_ADDR]], [[META136:![0-9]+]], !DIExpression(), [[META137:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store double [[B]], ptr [[B_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG140:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[B_ADDR]], [[META137:![0-9]+]], !DIExpression(), [[META138:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[B_ADDR]], [[META138:![0-9]+]], !DIExpression(), [[META139:![0-9]+]])
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG140:![0-9]+]]
  // CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
  // CHECK-DEBUG:       omp_parallel:
  // CHECK-DEBUG-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
@@ -1580,216 +1627,216 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    store ptr [[B_ADDR]], ptr [[GEP_B_ADDR]], align 8
  // CHECK-DEBUG-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
  // CHECK-DEBUG-NEXT:    store ptr [[R_ADDR]], ptr [[GEP_R_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG141:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG140:![0-9]+]]
 -// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
 -// CHECK-DEBUG:       omp.par.outlined.exit184:
-+// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG140:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG141:![0-9]+]]
 +// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT190:%.*]]
 +// CHECK-DEBUG:       omp.par.outlined.exit190:
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
  // CHECK-DEBUG:       omp.par.exit.split:
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I185]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I185]], align 4, !dbg [[DBG148]]
--// CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG149:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store ptr [[I185]], ptr [[TMP0]], align 8, !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I185]], align 4, !dbg [[DBG150:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]), !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG149]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I185]], [[META144:![0-9]+]], !DIExpression(), [[META147:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I185]], align 4, !dbg [[META147]]
+-// CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store ptr [[I185]], ptr [[TMP0]], align 8, !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I185]], align 4, !dbg [[DBG149:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]), !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]]
 -// CHECK-DEBUG:       omp_loop.preheader190:
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE206]], align 4, !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, ptr [[P_LASTITER203]], ptr [[P_LOWERBOUND204]], ptr [[P_UPPERBOUND205]], ptr [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG149]]
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE206]], align 4, !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, ptr [[P_LASTITER203]], ptr [[P_LOWERBOUND204]], ptr [[P_UPPERBOUND205]], ptr [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]]
 -// CHECK-DEBUG:       omp_loop.header191:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG149]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]]
 -// CHECK-DEBUG:       omp_loop.cond192:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG149]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]]
 -// CHECK-DEBUG:       omp_loop.body193:
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG151:![0-9]+]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]]), !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG152:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG152]]
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG151]]
--// CHECK-DEBUG-NEXT:    [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG153:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG152]]
--// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG154:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store float [[CONV202]], ptr [[TMP11]], align 4, !dbg [[DBG155:![0-9]+]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC194]], !dbg [[DBG149]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG150:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]]), !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG151]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG150]]
+-// CHECK-DEBUG-NEXT:    [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG151]]
+-// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store float [[CONV202]], ptr [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC194]], !dbg [[DBG148]]
 -// CHECK-DEBUG:       omp_loop.inc194:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191]], !dbg [[DBG149]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]]
 -// CHECK-DEBUG:       omp_loop.exit195:
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG149]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG151]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG151]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG149]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG150]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]]
 -// CHECK-DEBUG:       omp_loop.after196:
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG156:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I191]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I191]], align 4, !dbg [[DBG147]]
-+// CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED192]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store ptr [[I191]], ptr [[TMP0]], align 8, !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED193]], i32 0, i32 0, !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I191]], align 4, !dbg [[DBG149:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR194]], ptr [[AGG_CAPTURED192]]), !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    [[DOTCOUNT195:%.*]] = load i32, ptr [[DOTCOUNT_ADDR194]], align 4, !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER196:%.*]], !dbg [[DBG148]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG155:![0-9]+]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I191]], [[META145:![0-9]+]], !DIExpression(), [[META148:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I191]], align 4, !dbg [[META148]]
++// CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED192]], i32 0, i32 0, !dbg [[DBG149:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store ptr [[I191]], ptr [[TMP0]], align 8, !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED193]], i32 0, i32 0, !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I191]], align 4, !dbg [[DBG150:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR194]], ptr [[AGG_CAPTURED192]]), !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    [[DOTCOUNT195:%.*]] = load i32, ptr [[DOTCOUNT_ADDR194]], align 4, !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER196:%.*]], !dbg [[DBG149]]
 +// CHECK-DEBUG:       omp_loop.preheader196:
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND210]], align 4, !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT195]], 1, !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[P_UPPERBOUND211]], align 4, !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE212]], align 4, !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM213:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM213]], i32 34, ptr [[P_LASTITER209]], ptr [[P_LOWERBOUND210]], ptr [[P_UPPERBOUND211]], ptr [[P_STRIDE212]], i32 1, i32 0), !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND210]], align 4, !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND211]], align 4, !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS1214:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = add i32 [[TRIP_COUNT_MINUS1214]], 1, !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER197:%.*]], !dbg [[DBG148]]
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND210]], align 4, !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT195]], 1, !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[P_UPPERBOUND211]], align 4, !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE212]], align 4, !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM213:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM213]], i32 34, ptr [[P_LASTITER209]], ptr [[P_LOWERBOUND210]], ptr [[P_UPPERBOUND211]], ptr [[P_STRIDE212]], i32 1, i32 0), !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND210]], align 4, !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND211]], align 4, !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS1214:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = add i32 [[TRIP_COUNT_MINUS1214]], 1, !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER197:%.*]], !dbg [[DBG149]]
 +// CHECK-DEBUG:       omp_loop.header197:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV203:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER196]] ], [ [[OMP_LOOP_NEXT205:%.*]], [[OMP_LOOP_INC200:%.*]] ], !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND198:%.*]], !dbg [[DBG148]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV203:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER196]] ], [ [[OMP_LOOP_NEXT205:%.*]], [[OMP_LOOP_INC200:%.*]] ], !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND198:%.*]], !dbg [[DBG149]]
 +// CHECK-DEBUG:       omp_loop.cond198:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP204:%.*]] = icmp ult i32 [[OMP_LOOP_IV203]], [[TMP6]], !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP204]], label [[OMP_LOOP_BODY199:%.*]], label [[OMP_LOOP_EXIT201:%.*]], !dbg [[DBG148]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP204:%.*]] = icmp ult i32 [[OMP_LOOP_IV203]], [[TMP6]], !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP204]], label [[OMP_LOOP_BODY199:%.*]], label [[OMP_LOOP_EXIT201:%.*]], !dbg [[DBG149]]
 +// CHECK-DEBUG:       omp_loop.body199:
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = add i32 [[OMP_LOOP_IV203]], [[TMP4]], !dbg [[DBG150:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.20(ptr [[I191]], i32 [[TMP7]], ptr [[AGG_CAPTURED193]]), !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV206:%.*]] = sitofp i32 [[TMP8]] to double, !dbg [[DBG151]]
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG150]]
-+// CHECK-DEBUG-NEXT:    [[ADD207:%.*]] = fadd double [[CONV206]], [[TMP9]], !dbg [[DBG152:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV208:%.*]] = fptrunc double [[ADD207]] to float, !dbg [[DBG151]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store float [[CONV208]], ptr [[TMP10]], align 4, !dbg [[DBG154:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC200]], !dbg [[DBG148]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = add i32 [[OMP_LOOP_IV203]], [[TMP4]], !dbg [[DBG151:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.20(ptr [[I191]], i32 [[TMP7]], ptr [[AGG_CAPTURED193]]), !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG152:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV206:%.*]] = sitofp i32 [[TMP8]] to double, !dbg [[DBG152]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG151]]
++// CHECK-DEBUG-NEXT:    [[ADD207:%.*]] = fadd double [[CONV206]], [[TMP9]], !dbg [[DBG153:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV208:%.*]] = fptrunc double [[ADD207]] to float, !dbg [[DBG152]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG154:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store float [[CONV208]], ptr [[TMP10]], align 4, !dbg [[DBG155:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC200]], !dbg [[DBG149]]
 +// CHECK-DEBUG:       omp_loop.inc200:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT205]] = add nuw i32 [[OMP_LOOP_IV203]], 1, !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER197]], !dbg [[DBG148]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT205]] = add nuw i32 [[OMP_LOOP_IV203]], 1, !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER197]], !dbg [[DBG149]]
 +// CHECK-DEBUG:       omp_loop.exit201:
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM213]]), !dbg [[DBG148]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM215:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG150]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM215]]), !dbg [[DBG150]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER202:%.*]], !dbg [[DBG148]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM213]]), !dbg [[DBG149]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM215:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG151]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM215]]), !dbg [[DBG151]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER202:%.*]], !dbg [[DBG149]]
 +// CHECK-DEBUG:       omp_loop.after202:
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG155:![0-9]+]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG156:![0-9]+]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.23
--// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG157:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG156:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG156:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG157:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  omp.par.entry:
  // CHECK-DEBUG-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
  // CHECK-DEBUG-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
-@@ -1965,7 +1965,7 @@
+@@ -1965,11 +1965,11 @@
  // CHECK-DEBUG-NEXT:    [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8
  // CHECK-DEBUG-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
  // CHECK-DEBUG-NEXT:    [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8
 -// CHECK-DEBUG-NEXT:    [[STRUCTARG214:%.*]] = alloca { ptr, ptr, ptr }, align 8
+-// CHECK-DEBUG-NEXT:    [[P_LASTITER178:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND179:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND180:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_STRIDE181:%.*]] = alloca i32, align 4
 +// CHECK-DEBUG-NEXT:    [[STRUCTARG221:%.*]] = alloca { ptr, ptr, ptr }, align 8
- // CHECK-DEBUG-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
- // CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
- // CHECK-DEBUG-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
-@@ -1978,137 +1978,137 @@
++// CHECK-DEBUG-NEXT:    [[P_LASTITER183:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND184:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND185:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_STRIDE186:%.*]] = alloca i32, align 4
+ // CHECK-DEBUG-NEXT:    [[P_LASTITER:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[P_LOWERBOUND:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[P_UPPERBOUND:%.*]] = alloca i32, align 4
- // CHECK-DEBUG-NEXT:    [[P_STRIDE:%.*]] = alloca i32, align 4
+@@ -1982,133 +1982,133 @@
+ // CHECK-DEBUG-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8
+ // CHECK-DEBUG-NEXT:    [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4
+ // CHECK-DEBUG-NEXT:    [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
 -// CHECK-DEBUG-NEXT:    [[I160:%.*]] = alloca i32, align 4
 -// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED161:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8
 -// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED162:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4
 -// CHECK-DEBUG-NEXT:    [[DOTCOUNT_ADDR163:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_LASTITER178:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND179:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND180:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_STRIDE181:%.*]] = alloca i32, align 4
 +// CHECK-DEBUG-NEXT:    [[I165:%.*]] = alloca i32, align 4
 +// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED166:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8
 +// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED167:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4
 +// CHECK-DEBUG-NEXT:    [[DOTCOUNT_ADDR168:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_LASTITER183:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND184:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND185:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_STRIDE186:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION:%.*]]
  // CHECK-DEBUG:       omp.par.region:
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META158:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I]], align 4, !dbg [[DBG162]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG163:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG164:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.5(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG163]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG161:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I]], align 4, !dbg [[DBG161]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG162:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG163:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.5(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I]], [[META157:![0-9]+]], !DIExpression(), [[META161:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I]], align 4, !dbg [[META161]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG162:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG163:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.5(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG162]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I]], [[META158:![0-9]+]], !DIExpression(), [[META162:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I]], align 4, !dbg [[META162]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG163:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG164:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.5(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG163]]
  // CHECK-DEBUG:       omp_loop.preheader:
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]), !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG163]]
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]), !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS1:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS1]], 1, !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]), !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG162]]
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]), !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS1:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS1]], 1, !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG163]]
  // CHECK-DEBUG:       omp_loop.header:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG163]]
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG162]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG163]]
  // CHECK-DEBUG:       omp_loop.cond:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG163]]
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP8]], !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG162]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP8]], !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG163]]
  // CHECK-DEBUG:       omp_loop.exit:
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15]]), !dbg [[DBG165:![0-9]+]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB16:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]), !dbg [[DBG165]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG163]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15]]), !dbg [[DBG164:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB16:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]), !dbg [[DBG164]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15]]), !dbg [[DBG164:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB16:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]), !dbg [[DBG164]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG162]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15]]), !dbg [[DBG165:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB16:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]), !dbg [[DBG165]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG163]]
  // CHECK-DEBUG:       omp_loop.after:
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB18:[0-9]+]]), !dbg [[DBG166:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB18:[0-9]+]]), !dbg [[DBG165:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB18:[0-9]+]]), !dbg [[DBG165:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB18:[0-9]+]]), !dbg [[DBG166:![0-9]+]]
  // CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
  // CHECK-DEBUG:       omp_parallel:
 -// CHECK-DEBUG-NEXT:    [[GEP_A_ADDR215:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG214]], i32 0, i32 0
@@ -1798,7 +1845,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 -// CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR216]], align 8
 -// CHECK-DEBUG-NEXT:    [[GEP_R_ADDR217:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG214]], i32 0, i32 2
 -// CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR217]], align 8
--// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG214]]), !dbg [[DBG167:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG214]]), !dbg [[DBG166:![0-9]+]]
 -// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT159:%.*]]
 -// CHECK-DEBUG:       omp.par.outlined.exit159:
 +// CHECK-DEBUG-NEXT:    [[GEP_A_ADDR222:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG221]], i32 0, i32 0
@@ -1807,263 +1854,264 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 +// CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR223]], align 8
 +// CHECK-DEBUG-NEXT:    [[GEP_R_ADDR224:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG221]], i32 0, i32 2
 +// CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR224]], align 8
-+// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG221]]), !dbg [[DBG166:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG221]]), !dbg [[DBG167:![0-9]+]]
 +// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT164:%.*]]
 +// CHECK-DEBUG:       omp.par.outlined.exit164:
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT11_SPLIT:%.*]]
  // CHECK-DEBUG:       omp.par.exit11.split:
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I160]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG174:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I160]], align 4, !dbg [[DBG174]]
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED161]], i32 0, i32 0, !dbg [[DBG175:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store ptr [[I160]], ptr [[TMP10]], align 8, !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED162]], i32 0, i32 0, !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I160]], align 4, !dbg [[DBG176:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]]), !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    [[DOTCOUNT164:%.*]] = load i32, ptr [[DOTCOUNT_ADDR163]], align 4, !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER165:%.*]], !dbg [[DBG175]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I160]], [[META170:![0-9]+]], !DIExpression(), [[META173:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I160]], align 4, !dbg [[META173]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED161]], i32 0, i32 0, !dbg [[DBG174:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store ptr [[I160]], ptr [[TMP10]], align 8, !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED162]], i32 0, i32 0, !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I160]], align 4, !dbg [[DBG175:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]]), !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    [[DOTCOUNT164:%.*]] = load i32, ptr [[DOTCOUNT_ADDR163]], align 4, !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER165:%.*]], !dbg [[DBG174]]
 -// CHECK-DEBUG:       omp_loop.preheader165:
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = sub i32 [[DOTCOUNT164]], 1, !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP13]], ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE181]], align 4, !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM182:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39:[0-9]+]]), !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]], i32 34, ptr [[P_LASTITER178]], ptr [[P_LOWERBOUND179]], ptr [[P_UPPERBOUND180]], ptr [[P_STRIDE181]], i32 1, i32 0), !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER166:%.*]], !dbg [[DBG175]]
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = sub i32 [[DOTCOUNT164]], 1, !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP13]], ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE181]], align 4, !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM182:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39:[0-9]+]]), !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]], i32 34, ptr [[P_LASTITER178]], ptr [[P_LOWERBOUND179]], ptr [[P_UPPERBOUND180]], ptr [[P_STRIDE181]], i32 1, i32 0), !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER166:%.*]], !dbg [[DBG174]]
 -// CHECK-DEBUG:       omp_loop.header166:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV172:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER165]] ], [ [[OMP_LOOP_NEXT174:%.*]], [[OMP_LOOP_INC169:%.*]] ], !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND167:%.*]], !dbg [[DBG175]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV172:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER165]] ], [ [[OMP_LOOP_NEXT174:%.*]], [[OMP_LOOP_INC169:%.*]] ], !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND167:%.*]], !dbg [[DBG174]]
 -// CHECK-DEBUG:       omp_loop.cond167:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP173:%.*]] = icmp ult i32 [[OMP_LOOP_IV172]], [[TMP17]], !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP173]], label [[OMP_LOOP_BODY168:%.*]], label [[OMP_LOOP_EXIT170:%.*]], !dbg [[DBG175]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP173:%.*]] = icmp ult i32 [[OMP_LOOP_IV172]], [[TMP17]], !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP173]], label [[OMP_LOOP_BODY168:%.*]], label [[OMP_LOOP_EXIT170:%.*]], !dbg [[DBG174]]
 -// CHECK-DEBUG:       omp_loop.exit170:
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]]), !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM183:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG177:![0-9]+]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM183]]), !dbg [[DBG177]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER171:%.*]], !dbg [[DBG175]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]]), !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM183:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG176:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM183]]), !dbg [[DBG176]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER171:%.*]], !dbg [[DBG174]]
 -// CHECK-DEBUG:       omp_loop.after171:
--// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG178:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I165]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I165]], align 4, !dbg [[DBG173]]
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED166]], i32 0, i32 0, !dbg [[DBG174:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store ptr [[I165]], ptr [[TMP9]], align 8, !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED167]], i32 0, i32 0, !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I165]], align 4, !dbg [[DBG175:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP11]], ptr [[TMP10]], align 4, !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR168]], ptr [[AGG_CAPTURED166]]), !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    [[DOTCOUNT169:%.*]] = load i32, ptr [[DOTCOUNT_ADDR168]], align 4, !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER170:%.*]], !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG177:![0-9]+]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I165]], [[META171:![0-9]+]], !DIExpression(), [[META174:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I165]], align 4, !dbg [[META174]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED166]], i32 0, i32 0, !dbg [[DBG175:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store ptr [[I165]], ptr [[TMP9]], align 8, !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED167]], i32 0, i32 0, !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I165]], align 4, !dbg [[DBG176:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP11]], ptr [[TMP10]], align 4, !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR168]], ptr [[AGG_CAPTURED166]]), !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    [[DOTCOUNT169:%.*]] = load i32, ptr [[DOTCOUNT_ADDR168]], align 4, !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER170:%.*]], !dbg [[DBG175]]
 +// CHECK-DEBUG:       omp_loop.preheader170:
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND184]], align 4, !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = sub i32 [[DOTCOUNT169]], 1, !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP12]], ptr [[P_UPPERBOUND185]], align 4, !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE186]], align 4, !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM187:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39:[0-9]+]]), !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM187]], i32 34, ptr [[P_LASTITER183]], ptr [[P_LOWERBOUND184]], ptr [[P_UPPERBOUND185]], ptr [[P_STRIDE186]], i32 1, i32 0), !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = load i32, ptr [[P_LOWERBOUND184]], align 4, !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    [[TMP14:%.*]] = load i32, ptr [[P_UPPERBOUND185]], align 4, !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS1188:%.*]] = sub i32 [[TMP14]], [[TMP13]], !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    [[TMP15:%.*]] = add i32 [[TRIP_COUNT_MINUS1188]], 1, !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER171:%.*]], !dbg [[DBG174]]
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND184]], align 4, !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = sub i32 [[DOTCOUNT169]], 1, !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP12]], ptr [[P_UPPERBOUND185]], align 4, !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE186]], align 4, !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM187:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39:[0-9]+]]), !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM187]], i32 34, ptr [[P_LASTITER183]], ptr [[P_LOWERBOUND184]], ptr [[P_UPPERBOUND185]], ptr [[P_STRIDE186]], i32 1, i32 0), !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = load i32, ptr [[P_LOWERBOUND184]], align 4, !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    [[TMP14:%.*]] = load i32, ptr [[P_UPPERBOUND185]], align 4, !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS1188:%.*]] = sub i32 [[TMP14]], [[TMP13]], !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    [[TMP15:%.*]] = add i32 [[TRIP_COUNT_MINUS1188]], 1, !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER171:%.*]], !dbg [[DBG175]]
 +// CHECK-DEBUG:       omp_loop.header171:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV177:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER170]] ], [ [[OMP_LOOP_NEXT179:%.*]], [[OMP_LOOP_INC174:%.*]] ], !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND172:%.*]], !dbg [[DBG174]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV177:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER170]] ], [ [[OMP_LOOP_NEXT179:%.*]], [[OMP_LOOP_INC174:%.*]] ], !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND172:%.*]], !dbg [[DBG175]]
 +// CHECK-DEBUG:       omp_loop.cond172:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP178:%.*]] = icmp ult i32 [[OMP_LOOP_IV177]], [[TMP15]], !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP178]], label [[OMP_LOOP_BODY173:%.*]], label [[OMP_LOOP_EXIT175:%.*]], !dbg [[DBG174]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP178:%.*]] = icmp ult i32 [[OMP_LOOP_IV177]], [[TMP15]], !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP178]], label [[OMP_LOOP_BODY173:%.*]], label [[OMP_LOOP_EXIT175:%.*]], !dbg [[DBG175]]
 +// CHECK-DEBUG:       omp_loop.exit175:
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM187]]), !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM189:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG176:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM189]]), !dbg [[DBG176]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER176:%.*]], !dbg [[DBG174]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM187]]), !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM189:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG177:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM189]]), !dbg [[DBG177]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER176:%.*]], !dbg [[DBG175]]
 +// CHECK-DEBUG:       omp_loop.after176:
-+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG177:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG178:![0-9]+]]
  // CHECK-DEBUG:       omp.par.region.parallel.after:
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
  // CHECK-DEBUG:       omp.par.pre_finalize:
--// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184_EXITSTUB:%.*]], !dbg [[DBG178]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184_EXITSTUB:%.*]], !dbg [[DBG177]]
 -// CHECK-DEBUG:       omp_loop.body168:
--// CHECK-DEBUG-NEXT:    [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV172]], [[TMP14]], !dbg [[DBG177]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.18(ptr [[I160]], i32 [[TMP18]], ptr [[AGG_CAPTURED162]]), !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG179:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV175:%.*]] = sitofp i32 [[TMP19]] to double, !dbg [[DBG179]]
--// CHECK-DEBUG-NEXT:    [[TMP20:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG177]]
--// CHECK-DEBUG-NEXT:    [[ADD176:%.*]] = fadd double [[CONV175]], [[TMP20]], !dbg [[DBG180:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV177:%.*]] = fptrunc double [[ADD176]] to float, !dbg [[DBG179]]
--// CHECK-DEBUG-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG181:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store float [[CONV177]], ptr [[TMP21]], align 4, !dbg [[DBG182:![0-9]+]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC169]], !dbg [[DBG175]]
+-// CHECK-DEBUG-NEXT:    [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV172]], [[TMP14]], !dbg [[DBG176]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.18(ptr [[I160]], i32 [[TMP18]], ptr [[AGG_CAPTURED162]]), !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG178:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV175:%.*]] = sitofp i32 [[TMP19]] to double, !dbg [[DBG178]]
+-// CHECK-DEBUG-NEXT:    [[TMP20:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG176]]
+-// CHECK-DEBUG-NEXT:    [[ADD176:%.*]] = fadd double [[CONV175]], [[TMP20]], !dbg [[DBG179:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV177:%.*]] = fptrunc double [[ADD176]] to float, !dbg [[DBG178]]
+-// CHECK-DEBUG-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG180:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store float [[CONV177]], ptr [[TMP21]], align 4, !dbg [[DBG181:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC169]], !dbg [[DBG174]]
 -// CHECK-DEBUG:       omp_loop.inc169:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT174]] = add nuw i32 [[OMP_LOOP_IV172]], 1, !dbg [[DBG175]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER166]], !dbg [[DBG175]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT190_EXITSTUB:%.*]], !dbg [[DBG177]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT174]] = add nuw i32 [[OMP_LOOP_IV172]], 1, !dbg [[DBG174]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER166]], !dbg [[DBG174]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT190_EXITSTUB:%.*]], !dbg [[DBG178]]
 +// CHECK-DEBUG:       omp_loop.body173:
-+// CHECK-DEBUG-NEXT:    [[TMP16:%.*]] = add i32 [[OMP_LOOP_IV177]], [[TMP13]], !dbg [[DBG176]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.18(ptr [[I165]], i32 [[TMP16]], ptr [[AGG_CAPTURED167]]), !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    [[TMP17:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG178:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV180:%.*]] = sitofp i32 [[TMP17]] to double, !dbg [[DBG178]]
-+// CHECK-DEBUG-NEXT:    [[TMP18:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG176]]
-+// CHECK-DEBUG-NEXT:    [[ADD181:%.*]] = fadd double [[CONV180]], [[TMP18]], !dbg [[DBG179:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV182:%.*]] = fptrunc double [[ADD181]] to float, !dbg [[DBG178]]
-+// CHECK-DEBUG-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG180:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store float [[CONV182]], ptr [[TMP19]], align 4, !dbg [[DBG181:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC174]], !dbg [[DBG174]]
++// CHECK-DEBUG-NEXT:    [[TMP16:%.*]] = add i32 [[OMP_LOOP_IV177]], [[TMP13]], !dbg [[DBG177]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.18(ptr [[I165]], i32 [[TMP16]], ptr [[AGG_CAPTURED167]]), !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    [[TMP17:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG179:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV180:%.*]] = sitofp i32 [[TMP17]] to double, !dbg [[DBG179]]
++// CHECK-DEBUG-NEXT:    [[TMP18:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG177]]
++// CHECK-DEBUG-NEXT:    [[ADD181:%.*]] = fadd double [[CONV180]], [[TMP18]], !dbg [[DBG180:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV182:%.*]] = fptrunc double [[ADD181]] to float, !dbg [[DBG179]]
++// CHECK-DEBUG-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG181:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store float [[CONV182]], ptr [[TMP19]], align 4, !dbg [[DBG182:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC174]], !dbg [[DBG175]]
 +// CHECK-DEBUG:       omp_loop.inc174:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT179]] = add nuw i32 [[OMP_LOOP_IV177]], 1, !dbg [[DBG174]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER171]], !dbg [[DBG174]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT179]] = add nuw i32 [[OMP_LOOP_IV177]], 1, !dbg [[DBG175]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER171]], !dbg [[DBG175]]
  // CHECK-DEBUG:       omp_loop.body:
--// CHECK-DEBUG-NEXT:    [[TMP22:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG165]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.6(ptr [[I]], i32 [[TMP22]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    [[TMP23:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG183:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP23]] to double, !dbg [[DBG183]]
--// CHECK-DEBUG-NEXT:    [[TMP24:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG165]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], [[TMP24]], !dbg [[DBG184:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV2:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG183]]
--// CHECK-DEBUG-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG185:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store float [[CONV2]], ptr [[TMP25]], align 4, !dbg [[DBG186:![0-9]+]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC]], !dbg [[DBG163]]
-+// CHECK-DEBUG-NEXT:    [[TMP20:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG164]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.6(ptr [[I]], i32 [[TMP20]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    [[TMP21:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG182:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP21]] to double, !dbg [[DBG182]]
-+// CHECK-DEBUG-NEXT:    [[TMP22:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG164]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], [[TMP22]], !dbg [[DBG183:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV2:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG182]]
-+// CHECK-DEBUG-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG184:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store float [[CONV2]], ptr [[TMP23]], align 4, !dbg [[DBG185:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC]], !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    [[TMP22:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG164]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.6(ptr [[I]], i32 [[TMP22]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    [[TMP23:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG182:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP23]] to double, !dbg [[DBG182]]
+-// CHECK-DEBUG-NEXT:    [[TMP24:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG164]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], [[TMP24]], !dbg [[DBG183:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV2:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG182]]
+-// CHECK-DEBUG-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG184:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store float [[CONV2]], ptr [[TMP25]], align 4, !dbg [[DBG185:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC]], !dbg [[DBG162]]
++// CHECK-DEBUG-NEXT:    [[TMP20:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG165]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.6(ptr [[I]], i32 [[TMP20]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    [[TMP21:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG183:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP21]] to double, !dbg [[DBG183]]
++// CHECK-DEBUG-NEXT:    [[TMP22:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG165]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], [[TMP22]], !dbg [[DBG184:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV2:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG183]]
++// CHECK-DEBUG-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG185:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store float [[CONV2]], ptr [[TMP23]], align 4, !dbg [[DBG186:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC]], !dbg [[DBG163]]
  // CHECK-DEBUG:       omp_loop.inc:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG163]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[DBG163]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG162]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[DBG162]]
 -// CHECK-DEBUG:       omp.par.outlined.exit184.exitStub:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG162]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[DBG162]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG163]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[DBG163]]
 +// CHECK-DEBUG:       omp.par.outlined.exit190.exitStub:
  // CHECK-DEBUG-NEXT:    ret void
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.22
--// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR6:%.*]], ptr noalias [[ZERO_ADDR7:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG187:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR6:%.*]], ptr noalias [[ZERO_ADDR7:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG186:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR6:%.*]], ptr noalias [[ZERO_ADDR7:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG186:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR6:%.*]], ptr noalias [[ZERO_ADDR7:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG187:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  omp.par.entry8:
  // CHECK-DEBUG-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
  // CHECK-DEBUG-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
-@@ -2116,7 +2116,7 @@
+@@ -2116,16 +2116,16 @@
  // CHECK-DEBUG-NEXT:    [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8
  // CHECK-DEBUG-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
  // CHECK-DEBUG-NEXT:    [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8
 -// CHECK-DEBUG-NEXT:    [[STRUCTARG209:%.*]] = alloca { ptr, ptr, ptr }, align 8
 +// CHECK-DEBUG-NEXT:    [[STRUCTARG216:%.*]] = alloca { ptr, ptr, ptr }, align 8
  // CHECK-DEBUG-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8
- // CHECK-DEBUG-NEXT:    [[TID_ADDR_LOCAL12:%.*]] = alloca i32, align 4
- // CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR6]], align 4
-@@ -2130,59 +2130,59 @@
+-// CHECK-DEBUG-NEXT:    [[P_LASTITER153:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND154:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND155:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_STRIDE156:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_LASTITER93:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND94:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND95:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_STRIDE96:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_LASTITER157:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND158:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND159:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_STRIDE160:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_LASTITER95:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND96:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND97:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_STRIDE98:%.*]] = alloca i32, align 4
+ // CHECK-DEBUG-NEXT:    [[P_LASTITER34:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[P_LOWERBOUND35:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[P_UPPERBOUND36:%.*]] = alloca i32, align 4
- // CHECK-DEBUG-NEXT:    [[P_STRIDE37:%.*]] = alloca i32, align 4
+@@ -2138,51 +2138,51 @@
+ // CHECK-DEBUG-NEXT:    [[AGG_CAPTURED17:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8
+ // CHECK-DEBUG-NEXT:    [[AGG_CAPTURED18:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4
+ // CHECK-DEBUG-NEXT:    [[DOTCOUNT_ADDR19:%.*]] = alloca i32, align 4
 -// CHECK-DEBUG-NEXT:    [[I75:%.*]] = alloca i32, align 4
 -// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED76:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8
 -// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED77:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4
 -// CHECK-DEBUG-NEXT:    [[DOTCOUNT_ADDR78:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_LASTITER93:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND94:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND95:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_STRIDE96:%.*]] = alloca i32, align 4
 -// CHECK-DEBUG-NEXT:    [[I135:%.*]] = alloca i32, align 4
 -// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED136:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8
 -// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED137:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4
 -// CHECK-DEBUG-NEXT:    [[DOTCOUNT_ADDR138:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_LASTITER153:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND154:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND155:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_STRIDE156:%.*]] = alloca i32, align 4
 +// CHECK-DEBUG-NEXT:    [[I77:%.*]] = alloca i32, align 4
 +// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED78:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8
 +// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED79:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4
 +// CHECK-DEBUG-NEXT:    [[DOTCOUNT_ADDR80:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_LASTITER95:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND96:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND97:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_STRIDE98:%.*]] = alloca i32, align 4
 +// CHECK-DEBUG-NEXT:    [[I139:%.*]] = alloca i32, align 4
 +// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED140:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8
 +// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED141:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4
 +// CHECK-DEBUG-NEXT:    [[DOTCOUNT_ADDR142:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_LASTITER157:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND158:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND159:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_STRIDE160:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION9:%.*]]
  // CHECK-DEBUG:       omp.par.region9:
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I16]], metadata [[META188:![0-9]+]], metadata !DIExpression()), !dbg [[DBG193:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I16]], align 4, !dbg [[DBG193]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED17]], i32 0, i32 0, !dbg [[DBG194:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store ptr [[I16]], ptr [[TMP2]], align 8, !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED18]], i32 0, i32 0, !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I16]], align 4, !dbg [[DBG195:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.7(ptr [[DOTCOUNT_ADDR19]], ptr [[AGG_CAPTURED17]]), !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    [[DOTCOUNT20:%.*]] = load i32, ptr [[DOTCOUNT_ADDR19]], align 4, !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER21:%.*]], !dbg [[DBG194]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I16]], metadata [[META187:![0-9]+]], metadata !DIExpression()), !dbg [[DBG192:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I16]], align 4, !dbg [[DBG192]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED17]], i32 0, i32 0, !dbg [[DBG193:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store ptr [[I16]], ptr [[TMP2]], align 8, !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED18]], i32 0, i32 0, !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I16]], align 4, !dbg [[DBG194:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.7(ptr [[DOTCOUNT_ADDR19]], ptr [[AGG_CAPTURED17]]), !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    [[DOTCOUNT20:%.*]] = load i32, ptr [[DOTCOUNT_ADDR19]], align 4, !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER21:%.*]], !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I16]], [[META187:![0-9]+]], !DIExpression(), [[META192:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I16]], align 4, !dbg [[META192]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED17]], i32 0, i32 0, !dbg [[DBG193:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store ptr [[I16]], ptr [[TMP2]], align 8, !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED18]], i32 0, i32 0, !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I16]], align 4, !dbg [[DBG194:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.7(ptr [[DOTCOUNT_ADDR19]], ptr [[AGG_CAPTURED17]]), !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    [[DOTCOUNT20:%.*]] = load i32, ptr [[DOTCOUNT_ADDR19]], align 4, !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER21:%.*]], !dbg [[DBG193]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I16]], [[META188:![0-9]+]], !DIExpression(), [[META193:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I16]], align 4, !dbg [[META193]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED17]], i32 0, i32 0, !dbg [[DBG194:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store ptr [[I16]], ptr [[TMP2]], align 8, !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED18]], i32 0, i32 0, !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I16]], align 4, !dbg [[DBG195:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.7(ptr [[DOTCOUNT_ADDR19]], ptr [[AGG_CAPTURED17]]), !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    [[DOTCOUNT20:%.*]] = load i32, ptr [[DOTCOUNT_ADDR19]], align 4, !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER21:%.*]], !dbg [[DBG194]]
  // CHECK-DEBUG:       omp_loop.preheader21:
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT20]], 1, !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE37]], align 4, !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM38:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]], i32 34, ptr [[P_LASTITER34]], ptr [[P_LOWERBOUND35]], ptr [[P_UPPERBOUND36]], ptr [[P_STRIDE37]], i32 1, i32 0), !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER22:%.*]], !dbg [[DBG194]]
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT20]], 1, !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE37]], align 4, !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM38:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]], i32 34, ptr [[P_LASTITER34]], ptr [[P_LOWERBOUND35]], ptr [[P_UPPERBOUND36]], ptr [[P_STRIDE37]], i32 1, i32 0), !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS139:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS139]], 1, !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER22:%.*]], !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT20]], 1, !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE37]], align 4, !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM38:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]], i32 34, ptr [[P_LASTITER34]], ptr [[P_LOWERBOUND35]], ptr [[P_UPPERBOUND36]], ptr [[P_STRIDE37]], i32 1, i32 0), !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER22:%.*]], !dbg [[DBG193]]
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT20]], 1, !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE37]], align 4, !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM38:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]], i32 34, ptr [[P_LASTITER34]], ptr [[P_LOWERBOUND35]], ptr [[P_UPPERBOUND36]], ptr [[P_STRIDE37]], i32 1, i32 0), !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS139:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS139]], 1, !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER22:%.*]], !dbg [[DBG194]]
  // CHECK-DEBUG:       omp_loop.header22:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV28:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER21]] ], [ [[OMP_LOOP_NEXT30:%.*]], [[OMP_LOOP_INC25:%.*]] ], !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND23:%.*]], !dbg [[DBG194]]
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV28:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER21]] ], [ [[OMP_LOOP_NEXT30:%.*]], [[OMP_LOOP_INC25:%.*]] ], !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND23:%.*]], !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV28:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER21]] ], [ [[OMP_LOOP_NEXT30:%.*]], [[OMP_LOOP_INC25:%.*]] ], !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND23:%.*]], !dbg [[DBG193]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV28:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER21]] ], [ [[OMP_LOOP_NEXT30:%.*]], [[OMP_LOOP_INC25:%.*]] ], !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND23:%.*]], !dbg [[DBG194]]
  // CHECK-DEBUG:       omp_loop.cond23:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP29:%.*]] = icmp ult i32 [[OMP_LOOP_IV28]], [[TMP9]], !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP29]], label [[OMP_LOOP_BODY24:%.*]], label [[OMP_LOOP_EXIT26:%.*]], !dbg [[DBG194]]
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP29:%.*]] = icmp ult i32 [[OMP_LOOP_IV28]], [[TMP8]], !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP29]], label [[OMP_LOOP_BODY24:%.*]], label [[OMP_LOOP_EXIT26:%.*]], !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP29:%.*]] = icmp ult i32 [[OMP_LOOP_IV28]], [[TMP9]], !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP29]], label [[OMP_LOOP_BODY24:%.*]], label [[OMP_LOOP_EXIT26:%.*]], !dbg [[DBG193]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP29:%.*]] = icmp ult i32 [[OMP_LOOP_IV28]], [[TMP8]], !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP29]], label [[OMP_LOOP_BODY24:%.*]], label [[OMP_LOOP_EXIT26:%.*]], !dbg [[DBG194]]
  // CHECK-DEBUG:       omp_loop.exit26:
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]]), !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM39:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20]]), !dbg [[DBG196:![0-9]+]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB21:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM39]]), !dbg [[DBG196]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER27:%.*]], !dbg [[DBG194]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]]), !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20]]), !dbg [[DBG195:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB21:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]]), !dbg [[DBG195]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER27:%.*]], !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]]), !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM39:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20]]), !dbg [[DBG195:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB21:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM39]]), !dbg [[DBG195]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER27:%.*]], !dbg [[DBG193]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]]), !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20]]), !dbg [[DBG196:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB21:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]]), !dbg [[DBG196]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER27:%.*]], !dbg [[DBG194]]
  // CHECK-DEBUG:       omp_loop.after27:
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23:[0-9]+]]), !dbg [[DBG197:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23:[0-9]+]]), !dbg [[DBG196:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23:[0-9]+]]), !dbg [[DBG196:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23:[0-9]+]]), !dbg [[DBG197:![0-9]+]]
  // CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
  // CHECK-DEBUG:       omp_parallel:
  // CHECK-DEBUG-NEXT:    [[GEP_A_ADDR1:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
@@ -2071,47 +2119,47 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR2]], align 8
  // CHECK-DEBUG-NEXT:    [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
  // CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8
--// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG198:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG197:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG197:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG198:![0-9]+]]
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
  // CHECK-DEBUG:       omp.par.outlined.exit:
 -// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT46_SPLIT:%.*]]
 -// CHECK-DEBUG:       omp.par.exit46.split:
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I75]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I75]], align 4, !dbg [[DBG205]]
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED76]], i32 0, i32 0, !dbg [[DBG206:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store ptr [[I75]], ptr [[TMP10]], align 8, !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED77]], i32 0, i32 0, !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I75]], align 4, !dbg [[DBG207:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR78]], ptr [[AGG_CAPTURED76]]), !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    [[DOTCOUNT79:%.*]] = load i32, ptr [[DOTCOUNT_ADDR78]], align 4, !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER80:%.*]], !dbg [[DBG206]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I75]], [[META201:![0-9]+]], !DIExpression(), [[META204:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I75]], align 4, !dbg [[META204]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED76]], i32 0, i32 0, !dbg [[DBG205:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store ptr [[I75]], ptr [[TMP10]], align 8, !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED77]], i32 0, i32 0, !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I75]], align 4, !dbg [[DBG206:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR78]], ptr [[AGG_CAPTURED76]]), !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    [[DOTCOUNT79:%.*]] = load i32, ptr [[DOTCOUNT_ADDR78]], align 4, !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER80:%.*]], !dbg [[DBG205]]
 -// CHECK-DEBUG:       omp_loop.preheader80:
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND94]], align 4, !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = sub i32 [[DOTCOUNT79]], 1, !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP13]], ptr [[P_UPPERBOUND95]], align 4, !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE96]], align 4, !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM97:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28:[0-9]+]]), !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM97]], i32 34, ptr [[P_LASTITER93]], ptr [[P_LOWERBOUND94]], ptr [[P_UPPERBOUND95]], ptr [[P_STRIDE96]], i32 1, i32 0), !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND94]], align 4, !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND95]], align 4, !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER81:%.*]], !dbg [[DBG206]]
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND94]], align 4, !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = sub i32 [[DOTCOUNT79]], 1, !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP13]], ptr [[P_UPPERBOUND95]], align 4, !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE96]], align 4, !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM97:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28:[0-9]+]]), !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM97]], i32 34, ptr [[P_LASTITER93]], ptr [[P_LOWERBOUND94]], ptr [[P_UPPERBOUND95]], ptr [[P_STRIDE96]], i32 1, i32 0), !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND94]], align 4, !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND95]], align 4, !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER81:%.*]], !dbg [[DBG205]]
 -// CHECK-DEBUG:       omp_loop.header81:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV87:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER80]] ], [ [[OMP_LOOP_NEXT89:%.*]], [[OMP_LOOP_INC84:%.*]] ], !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND82:%.*]], !dbg [[DBG206]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV87:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER80]] ], [ [[OMP_LOOP_NEXT89:%.*]], [[OMP_LOOP_INC84:%.*]] ], !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND82:%.*]], !dbg [[DBG205]]
 -// CHECK-DEBUG:       omp_loop.cond82:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP88:%.*]] = icmp ult i32 [[OMP_LOOP_IV87]], [[TMP17]], !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP88]], label [[OMP_LOOP_BODY83:%.*]], label [[OMP_LOOP_EXIT85:%.*]], !dbg [[DBG206]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP88:%.*]] = icmp ult i32 [[OMP_LOOP_IV87]], [[TMP17]], !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP88]], label [[OMP_LOOP_BODY83:%.*]], label [[OMP_LOOP_EXIT85:%.*]], !dbg [[DBG205]]
 -// CHECK-DEBUG:       omp_loop.exit85:
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM97]]), !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM98:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28]]), !dbg [[DBG208:![0-9]+]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB29:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM98]]), !dbg [[DBG208]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER86:%.*]], !dbg [[DBG206]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM97]]), !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM98:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28]]), !dbg [[DBG207:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB29:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM98]]), !dbg [[DBG207]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER86:%.*]], !dbg [[DBG205]]
 -// CHECK-DEBUG:       omp_loop.after86:
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB31:[0-9]+]]), !dbg [[DBG209:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB31:[0-9]+]]), !dbg [[DBG208:![0-9]+]]
 -// CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL213:%.*]]
 -// CHECK-DEBUG:       omp_parallel213:
 -// CHECK-DEBUG-NEXT:    [[GEP_A_ADDR210:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 0
@@ -2120,83 +2168,83 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 -// CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR211]], align 8
 -// CHECK-DEBUG-NEXT:    [[GEP_R_ADDR212:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 2
 -// CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR212]], align 8
--// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB31]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG209]]), !dbg [[DBG210:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB31]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG209]]), !dbg [[DBG209:![0-9]+]]
 -// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT134:%.*]]
 -// CHECK-DEBUG:       omp.par.outlined.exit134:
 -// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT105_SPLIT:%.*]]
 -// CHECK-DEBUG:       omp.par.exit105.split:
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I135]], metadata [[META214:![0-9]+]], metadata !DIExpression()), !dbg [[DBG217:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I135]], align 4, !dbg [[DBG217]]
--// CHECK-DEBUG-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED136]], i32 0, i32 0, !dbg [[DBG218:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store ptr [[I135]], ptr [[TMP18]], align 8, !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED137]], i32 0, i32 0, !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    [[TMP20:%.*]] = load i32, ptr [[I135]], align 4, !dbg [[DBG219:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP20]], ptr [[TMP19]], align 4, !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]]), !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    [[DOTCOUNT139:%.*]] = load i32, ptr [[DOTCOUNT_ADDR138]], align 4, !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER140:%.*]], !dbg [[DBG218]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I135]], [[META213:![0-9]+]], !DIExpression(), [[META216:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I135]], align 4, !dbg [[META216]]
+-// CHECK-DEBUG-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED136]], i32 0, i32 0, !dbg [[DBG217:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store ptr [[I135]], ptr [[TMP18]], align 8, !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED137]], i32 0, i32 0, !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    [[TMP20:%.*]] = load i32, ptr [[I135]], align 4, !dbg [[DBG218:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP20]], ptr [[TMP19]], align 4, !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]]), !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    [[DOTCOUNT139:%.*]] = load i32, ptr [[DOTCOUNT_ADDR138]], align 4, !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER140:%.*]], !dbg [[DBG217]]
 -// CHECK-DEBUG:       omp_loop.preheader140:
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    [[TMP21:%.*]] = sub i32 [[DOTCOUNT139]], 1, !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP21]], ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE156]], align 4, !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM157:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36:[0-9]+]]), !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]], i32 34, ptr [[P_LASTITER153]], ptr [[P_LOWERBOUND154]], ptr [[P_UPPERBOUND155]], ptr [[P_STRIDE156]], i32 1, i32 0), !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    [[TMP22:%.*]] = load i32, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    [[TMP23:%.*]] = load i32, ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]], !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    [[TMP25:%.*]] = add i32 [[TMP24]], 1, !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER141:%.*]], !dbg [[DBG218]]
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    [[TMP21:%.*]] = sub i32 [[DOTCOUNT139]], 1, !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP21]], ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE156]], align 4, !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM157:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36:[0-9]+]]), !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]], i32 34, ptr [[P_LASTITER153]], ptr [[P_LOWERBOUND154]], ptr [[P_UPPERBOUND155]], ptr [[P_STRIDE156]], i32 1, i32 0), !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    [[TMP22:%.*]] = load i32, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    [[TMP23:%.*]] = load i32, ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]], !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    [[TMP25:%.*]] = add i32 [[TMP24]], 1, !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER141:%.*]], !dbg [[DBG217]]
 -// CHECK-DEBUG:       omp_loop.header141:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV147:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER140]] ], [ [[OMP_LOOP_NEXT149:%.*]], [[OMP_LOOP_INC144:%.*]] ], !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND142:%.*]], !dbg [[DBG218]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV147:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER140]] ], [ [[OMP_LOOP_NEXT149:%.*]], [[OMP_LOOP_INC144:%.*]] ], !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND142:%.*]], !dbg [[DBG217]]
 -// CHECK-DEBUG:       omp_loop.cond142:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP148:%.*]] = icmp ult i32 [[OMP_LOOP_IV147]], [[TMP25]], !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP148]], label [[OMP_LOOP_BODY143:%.*]], label [[OMP_LOOP_EXIT145:%.*]], !dbg [[DBG218]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP148:%.*]] = icmp ult i32 [[OMP_LOOP_IV147]], [[TMP25]], !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP148]], label [[OMP_LOOP_BODY143:%.*]], label [[OMP_LOOP_EXIT145:%.*]], !dbg [[DBG217]]
 -// CHECK-DEBUG:       omp_loop.exit145:
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]]), !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM158:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG220:![0-9]+]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM158]]), !dbg [[DBG220]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER146:%.*]], !dbg [[DBG218]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]]), !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM158:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG219:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM158]]), !dbg [[DBG219]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER146:%.*]], !dbg [[DBG217]]
 -// CHECK-DEBUG:       omp_loop.after146:
--// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]], !dbg [[DBG221:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]], !dbg [[DBG220:![0-9]+]]
 +// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT47_SPLIT:%.*]]
 +// CHECK-DEBUG:       omp.par.exit47.split:
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I77]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG204:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I77]], align 4, !dbg [[DBG204]]
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED78]], i32 0, i32 0, !dbg [[DBG205:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store ptr [[I77]], ptr [[TMP9]], align 8, !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED79]], i32 0, i32 0, !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I77]], align 4, !dbg [[DBG206:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP11]], ptr [[TMP10]], align 4, !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR80]], ptr [[AGG_CAPTURED78]]), !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    [[DOTCOUNT81:%.*]] = load i32, ptr [[DOTCOUNT_ADDR80]], align 4, !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER82:%.*]], !dbg [[DBG205]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I77]], [[META202:![0-9]+]], !DIExpression(), [[META205:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I77]], align 4, !dbg [[META205]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED78]], i32 0, i32 0, !dbg [[DBG206:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store ptr [[I77]], ptr [[TMP9]], align 8, !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED79]], i32 0, i32 0, !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I77]], align 4, !dbg [[DBG207:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP11]], ptr [[TMP10]], align 4, !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR80]], ptr [[AGG_CAPTURED78]]), !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    [[DOTCOUNT81:%.*]] = load i32, ptr [[DOTCOUNT_ADDR80]], align 4, !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER82:%.*]], !dbg [[DBG206]]
 +// CHECK-DEBUG:       omp_loop.preheader82:
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND96]], align 4, !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = sub i32 [[DOTCOUNT81]], 1, !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP12]], ptr [[P_UPPERBOUND97]], align 4, !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE98]], align 4, !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28:[0-9]+]]), !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM99]], i32 34, ptr [[P_LASTITER95]], ptr [[P_LOWERBOUND96]], ptr [[P_UPPERBOUND97]], ptr [[P_STRIDE98]], i32 1, i32 0), !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = load i32, ptr [[P_LOWERBOUND96]], align 4, !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    [[TMP14:%.*]] = load i32, ptr [[P_UPPERBOUND97]], align 4, !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS1100:%.*]] = sub i32 [[TMP14]], [[TMP13]], !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    [[TMP15:%.*]] = add i32 [[TRIP_COUNT_MINUS1100]], 1, !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER83:%.*]], !dbg [[DBG205]]
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND96]], align 4, !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = sub i32 [[DOTCOUNT81]], 1, !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP12]], ptr [[P_UPPERBOUND97]], align 4, !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE98]], align 4, !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28:[0-9]+]]), !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM99]], i32 34, ptr [[P_LASTITER95]], ptr [[P_LOWERBOUND96]], ptr [[P_UPPERBOUND97]], ptr [[P_STRIDE98]], i32 1, i32 0), !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = load i32, ptr [[P_LOWERBOUND96]], align 4, !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    [[TMP14:%.*]] = load i32, ptr [[P_UPPERBOUND97]], align 4, !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS1100:%.*]] = sub i32 [[TMP14]], [[TMP13]], !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    [[TMP15:%.*]] = add i32 [[TRIP_COUNT_MINUS1100]], 1, !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER83:%.*]], !dbg [[DBG206]]
 +// CHECK-DEBUG:       omp_loop.header83:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV89:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER82]] ], [ [[OMP_LOOP_NEXT91:%.*]], [[OMP_LOOP_INC86:%.*]] ], !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND84:%.*]], !dbg [[DBG205]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV89:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER82]] ], [ [[OMP_LOOP_NEXT91:%.*]], [[OMP_LOOP_INC86:%.*]] ], !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND84:%.*]], !dbg [[DBG206]]
 +// CHECK-DEBUG:       omp_loop.cond84:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP90:%.*]] = icmp ult i32 [[OMP_LOOP_IV89]], [[TMP15]], !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP90]], label [[OMP_LOOP_BODY85:%.*]], label [[OMP_LOOP_EXIT87:%.*]], !dbg [[DBG205]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP90:%.*]] = icmp ult i32 [[OMP_LOOP_IV89]], [[TMP15]], !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP90]], label [[OMP_LOOP_BODY85:%.*]], label [[OMP_LOOP_EXIT87:%.*]], !dbg [[DBG206]]
 +// CHECK-DEBUG:       omp_loop.exit87:
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM99]]), !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM101:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28]]), !dbg [[DBG207:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB29:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM101]]), !dbg [[DBG207]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER88:%.*]], !dbg [[DBG205]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM99]]), !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM101:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28]]), !dbg [[DBG208:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB29:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM101]]), !dbg [[DBG208]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER88:%.*]], !dbg [[DBG206]]
 +// CHECK-DEBUG:       omp_loop.after88:
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM102:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB31:[0-9]+]]), !dbg [[DBG208:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM102:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB31:[0-9]+]]), !dbg [[DBG209:![0-9]+]]
 +// CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL220:%.*]]
 +// CHECK-DEBUG:       omp_parallel220:
 +// CHECK-DEBUG-NEXT:    [[GEP_A_ADDR217:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG216]], i32 0, i32 0
@@ -2205,142 +2253,142 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 +// CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR218]], align 8
 +// CHECK-DEBUG-NEXT:    [[GEP_R_ADDR219:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG216]], i32 0, i32 2
 +// CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR219]], align 8
-+// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB31]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG216]]), !dbg [[DBG209:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB31]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG216]]), !dbg [[DBG210:![0-9]+]]
 +// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT138:%.*]]
 +// CHECK-DEBUG:       omp.par.outlined.exit138:
 +// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT108_SPLIT:%.*]]
 +// CHECK-DEBUG:       omp.par.exit108.split:
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I139]], metadata [[META213:![0-9]+]], metadata !DIExpression()), !dbg [[DBG216:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I139]], align 4, !dbg [[DBG216]]
-+// CHECK-DEBUG-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED140]], i32 0, i32 0, !dbg [[DBG217:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store ptr [[I139]], ptr [[TMP16]], align 8, !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED141]], i32 0, i32 0, !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I139]], align 4, !dbg [[DBG218:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP18]], ptr [[TMP17]], align 4, !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR142]], ptr [[AGG_CAPTURED140]]), !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    [[DOTCOUNT143:%.*]] = load i32, ptr [[DOTCOUNT_ADDR142]], align 4, !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER144:%.*]], !dbg [[DBG217]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I139]], [[META214:![0-9]+]], !DIExpression(), [[META217:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I139]], align 4, !dbg [[META217]]
++// CHECK-DEBUG-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED140]], i32 0, i32 0, !dbg [[DBG218:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store ptr [[I139]], ptr [[TMP16]], align 8, !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED141]], i32 0, i32 0, !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I139]], align 4, !dbg [[DBG219:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP18]], ptr [[TMP17]], align 4, !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR142]], ptr [[AGG_CAPTURED140]]), !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    [[DOTCOUNT143:%.*]] = load i32, ptr [[DOTCOUNT_ADDR142]], align 4, !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER144:%.*]], !dbg [[DBG218]]
 +// CHECK-DEBUG:       omp_loop.preheader144:
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND158]], align 4, !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    [[TMP19:%.*]] = sub i32 [[DOTCOUNT143]], 1, !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP19]], ptr [[P_UPPERBOUND159]], align 4, !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE160]], align 4, !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM161:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36:[0-9]+]]), !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM161]], i32 34, ptr [[P_LASTITER157]], ptr [[P_LOWERBOUND158]], ptr [[P_UPPERBOUND159]], ptr [[P_STRIDE160]], i32 1, i32 0), !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    [[TMP20:%.*]] = load i32, ptr [[P_LOWERBOUND158]], align 4, !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    [[TMP21:%.*]] = load i32, ptr [[P_UPPERBOUND159]], align 4, !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS1162:%.*]] = sub i32 [[TMP21]], [[TMP20]], !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    [[TMP22:%.*]] = add i32 [[TRIP_COUNT_MINUS1162]], 1, !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER145:%.*]], !dbg [[DBG217]]
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND158]], align 4, !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    [[TMP19:%.*]] = sub i32 [[DOTCOUNT143]], 1, !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP19]], ptr [[P_UPPERBOUND159]], align 4, !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE160]], align 4, !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM161:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36:[0-9]+]]), !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM161]], i32 34, ptr [[P_LASTITER157]], ptr [[P_LOWERBOUND158]], ptr [[P_UPPERBOUND159]], ptr [[P_STRIDE160]], i32 1, i32 0), !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    [[TMP20:%.*]] = load i32, ptr [[P_LOWERBOUND158]], align 4, !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    [[TMP21:%.*]] = load i32, ptr [[P_UPPERBOUND159]], align 4, !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS1162:%.*]] = sub i32 [[TMP21]], [[TMP20]], !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    [[TMP22:%.*]] = add i32 [[TRIP_COUNT_MINUS1162]], 1, !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER145:%.*]], !dbg [[DBG218]]
 +// CHECK-DEBUG:       omp_loop.header145:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV151:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER144]] ], [ [[OMP_LOOP_NEXT153:%.*]], [[OMP_LOOP_INC148:%.*]] ], !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND146:%.*]], !dbg [[DBG217]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV151:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER144]] ], [ [[OMP_LOOP_NEXT153:%.*]], [[OMP_LOOP_INC148:%.*]] ], !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND146:%.*]], !dbg [[DBG218]]
 +// CHECK-DEBUG:       omp_loop.cond146:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP152:%.*]] = icmp ult i32 [[OMP_LOOP_IV151]], [[TMP22]], !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP152]], label [[OMP_LOOP_BODY147:%.*]], label [[OMP_LOOP_EXIT149:%.*]], !dbg [[DBG217]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP152:%.*]] = icmp ult i32 [[OMP_LOOP_IV151]], [[TMP22]], !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP152]], label [[OMP_LOOP_BODY147:%.*]], label [[OMP_LOOP_EXIT149:%.*]], !dbg [[DBG218]]
 +// CHECK-DEBUG:       omp_loop.exit149:
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM161]]), !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM163:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG219:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM163]]), !dbg [[DBG219]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER150:%.*]], !dbg [[DBG217]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM161]]), !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM163:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG220:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM163]]), !dbg [[DBG220]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER150:%.*]], !dbg [[DBG218]]
 +// CHECK-DEBUG:       omp_loop.after150:
-+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]], !dbg [[DBG220:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]], !dbg [[DBG221:![0-9]+]]
  // CHECK-DEBUG:       omp.par.region9.parallel.after:
  // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE10:%.*]]
  // CHECK-DEBUG:       omp.par.pre_finalize10:
--// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT159_EXITSTUB:%.*]], !dbg [[DBG221]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT159_EXITSTUB:%.*]], !dbg [[DBG220]]
 -// CHECK-DEBUG:       omp_loop.body143:
--// CHECK-DEBUG-NEXT:    [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV147]], [[TMP22]], !dbg [[DBG220]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.16(ptr [[I135]], i32 [[TMP26]], ptr [[AGG_CAPTURED137]]), !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG222:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV150:%.*]] = sitofp i32 [[TMP27]] to double, !dbg [[DBG222]]
--// CHECK-DEBUG-NEXT:    [[TMP28:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG220]]
--// CHECK-DEBUG-NEXT:    [[ADD151:%.*]] = fadd double [[CONV150]], [[TMP28]], !dbg [[DBG223:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV152:%.*]] = fptrunc double [[ADD151]] to float, !dbg [[DBG222]]
--// CHECK-DEBUG-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG224:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store float [[CONV152]], ptr [[TMP29]], align 4, !dbg [[DBG225:![0-9]+]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC144]], !dbg [[DBG218]]
+-// CHECK-DEBUG-NEXT:    [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV147]], [[TMP22]], !dbg [[DBG219]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.16(ptr [[I135]], i32 [[TMP26]], ptr [[AGG_CAPTURED137]]), !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG221:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV150:%.*]] = sitofp i32 [[TMP27]] to double, !dbg [[DBG221]]
+-// CHECK-DEBUG-NEXT:    [[TMP28:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG219]]
+-// CHECK-DEBUG-NEXT:    [[ADD151:%.*]] = fadd double [[CONV150]], [[TMP28]], !dbg [[DBG222:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV152:%.*]] = fptrunc double [[ADD151]] to float, !dbg [[DBG221]]
+-// CHECK-DEBUG-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG223:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store float [[CONV152]], ptr [[TMP29]], align 4, !dbg [[DBG224:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC144]], !dbg [[DBG217]]
 -// CHECK-DEBUG:       omp_loop.inc144:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT149]] = add nuw i32 [[OMP_LOOP_IV147]], 1, !dbg [[DBG218]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER141]], !dbg [[DBG218]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT149]] = add nuw i32 [[OMP_LOOP_IV147]], 1, !dbg [[DBG217]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER141]], !dbg [[DBG217]]
 -// CHECK-DEBUG:       omp_loop.body83:
--// CHECK-DEBUG-NEXT:    [[TMP30:%.*]] = add i32 [[OMP_LOOP_IV87]], [[TMP14]], !dbg [[DBG208]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.12(ptr [[I75]], i32 [[TMP30]], ptr [[AGG_CAPTURED77]]), !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    [[TMP31:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG226:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV90:%.*]] = sitofp i32 [[TMP31]] to double, !dbg [[DBG226]]
--// CHECK-DEBUG-NEXT:    [[TMP32:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG208]]
--// CHECK-DEBUG-NEXT:    [[ADD91:%.*]] = fadd double [[CONV90]], [[TMP32]], !dbg [[DBG227:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV92:%.*]] = fptrunc double [[ADD91]] to float, !dbg [[DBG226]]
--// CHECK-DEBUG-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG228:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store float [[CONV92]], ptr [[TMP33]], align 4, !dbg [[DBG229:![0-9]+]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC84]], !dbg [[DBG206]]
+-// CHECK-DEBUG-NEXT:    [[TMP30:%.*]] = add i32 [[OMP_LOOP_IV87]], [[TMP14]], !dbg [[DBG207]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.12(ptr [[I75]], i32 [[TMP30]], ptr [[AGG_CAPTURED77]]), !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    [[TMP31:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG225:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV90:%.*]] = sitofp i32 [[TMP31]] to double, !dbg [[DBG225]]
+-// CHECK-DEBUG-NEXT:    [[TMP32:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG207]]
+-// CHECK-DEBUG-NEXT:    [[ADD91:%.*]] = fadd double [[CONV90]], [[TMP32]], !dbg [[DBG226:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV92:%.*]] = fptrunc double [[ADD91]] to float, !dbg [[DBG225]]
+-// CHECK-DEBUG-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG227:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store float [[CONV92]], ptr [[TMP33]], align 4, !dbg [[DBG228:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC84]], !dbg [[DBG205]]
 -// CHECK-DEBUG:       omp_loop.inc84:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT89]] = add nuw i32 [[OMP_LOOP_IV87]], 1, !dbg [[DBG206]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER81]], !dbg [[DBG206]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT164_EXITSTUB:%.*]], !dbg [[DBG220]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT89]] = add nuw i32 [[OMP_LOOP_IV87]], 1, !dbg [[DBG205]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER81]], !dbg [[DBG205]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT164_EXITSTUB:%.*]], !dbg [[DBG221]]
 +// CHECK-DEBUG:       omp_loop.body147:
-+// CHECK-DEBUG-NEXT:    [[TMP23:%.*]] = add i32 [[OMP_LOOP_IV151]], [[TMP20]], !dbg [[DBG219]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.16(ptr [[I139]], i32 [[TMP23]], ptr [[AGG_CAPTURED141]]), !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    [[TMP24:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG221:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV154:%.*]] = sitofp i32 [[TMP24]] to double, !dbg [[DBG221]]
-+// CHECK-DEBUG-NEXT:    [[TMP25:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG219]]
-+// CHECK-DEBUG-NEXT:    [[ADD155:%.*]] = fadd double [[CONV154]], [[TMP25]], !dbg [[DBG222:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV156:%.*]] = fptrunc double [[ADD155]] to float, !dbg [[DBG221]]
-+// CHECK-DEBUG-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG223:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store float [[CONV156]], ptr [[TMP26]], align 4, !dbg [[DBG224:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC148]], !dbg [[DBG217]]
++// CHECK-DEBUG-NEXT:    [[TMP23:%.*]] = add i32 [[OMP_LOOP_IV151]], [[TMP20]], !dbg [[DBG220]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.16(ptr [[I139]], i32 [[TMP23]], ptr [[AGG_CAPTURED141]]), !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    [[TMP24:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG222:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV154:%.*]] = sitofp i32 [[TMP24]] to double, !dbg [[DBG222]]
++// CHECK-DEBUG-NEXT:    [[TMP25:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG220]]
++// CHECK-DEBUG-NEXT:    [[ADD155:%.*]] = fadd double [[CONV154]], [[TMP25]], !dbg [[DBG223:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV156:%.*]] = fptrunc double [[ADD155]] to float, !dbg [[DBG222]]
++// CHECK-DEBUG-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG224:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store float [[CONV156]], ptr [[TMP26]], align 4, !dbg [[DBG225:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC148]], !dbg [[DBG218]]
 +// CHECK-DEBUG:       omp_loop.inc148:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT153]] = add nuw i32 [[OMP_LOOP_IV151]], 1, !dbg [[DBG217]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER145]], !dbg [[DBG217]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT153]] = add nuw i32 [[OMP_LOOP_IV151]], 1, !dbg [[DBG218]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER145]], !dbg [[DBG218]]
 +// CHECK-DEBUG:       omp_loop.body85:
-+// CHECK-DEBUG-NEXT:    [[TMP27:%.*]] = add i32 [[OMP_LOOP_IV89]], [[TMP13]], !dbg [[DBG207]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.12(ptr [[I77]], i32 [[TMP27]], ptr [[AGG_CAPTURED79]]), !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    [[TMP28:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG225:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV92:%.*]] = sitofp i32 [[TMP28]] to double, !dbg [[DBG225]]
-+// CHECK-DEBUG-NEXT:    [[TMP29:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG207]]
-+// CHECK-DEBUG-NEXT:    [[ADD93:%.*]] = fadd double [[CONV92]], [[TMP29]], !dbg [[DBG226:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV94:%.*]] = fptrunc double [[ADD93]] to float, !dbg [[DBG225]]
-+// CHECK-DEBUG-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG227:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store float [[CONV94]], ptr [[TMP30]], align 4, !dbg [[DBG228:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC86]], !dbg [[DBG205]]
++// CHECK-DEBUG-NEXT:    [[TMP27:%.*]] = add i32 [[OMP_LOOP_IV89]], [[TMP13]], !dbg [[DBG208]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.12(ptr [[I77]], i32 [[TMP27]], ptr [[AGG_CAPTURED79]]), !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    [[TMP28:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG226:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV92:%.*]] = sitofp i32 [[TMP28]] to double, !dbg [[DBG226]]
++// CHECK-DEBUG-NEXT:    [[TMP29:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG208]]
++// CHECK-DEBUG-NEXT:    [[ADD93:%.*]] = fadd double [[CONV92]], [[TMP29]], !dbg [[DBG227:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV94:%.*]] = fptrunc double [[ADD93]] to float, !dbg [[DBG226]]
++// CHECK-DEBUG-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG228:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store float [[CONV94]], ptr [[TMP30]], align 4, !dbg [[DBG229:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC86]], !dbg [[DBG206]]
 +// CHECK-DEBUG:       omp_loop.inc86:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT91]] = add nuw i32 [[OMP_LOOP_IV89]], 1, !dbg [[DBG205]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER83]], !dbg [[DBG205]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT91]] = add nuw i32 [[OMP_LOOP_IV89]], 1, !dbg [[DBG206]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER83]], !dbg [[DBG206]]
  // CHECK-DEBUG:       omp_loop.body24:
--// CHECK-DEBUG-NEXT:    [[TMP34:%.*]] = add i32 [[OMP_LOOP_IV28]], [[TMP6]], !dbg [[DBG196]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.8(ptr [[I16]], i32 [[TMP34]], ptr [[AGG_CAPTURED18]]), !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    [[TMP35:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG230:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV31:%.*]] = sitofp i32 [[TMP35]] to double, !dbg [[DBG230]]
--// CHECK-DEBUG-NEXT:    [[TMP36:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG196]]
--// CHECK-DEBUG-NEXT:    [[ADD32:%.*]] = fadd double [[CONV31]], [[TMP36]], !dbg [[DBG231:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV33:%.*]] = fptrunc double [[ADD32]] to float, !dbg [[DBG230]]
--// CHECK-DEBUG-NEXT:    [[TMP37:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG232:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store float [[CONV33]], ptr [[TMP37]], align 4, !dbg [[DBG233:![0-9]+]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC25]], !dbg [[DBG194]]
-+// CHECK-DEBUG-NEXT:    [[TMP31:%.*]] = add i32 [[OMP_LOOP_IV28]], [[TMP6]], !dbg [[DBG195]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.8(ptr [[I16]], i32 [[TMP31]], ptr [[AGG_CAPTURED18]]), !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    [[TMP32:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG229:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV31:%.*]] = sitofp i32 [[TMP32]] to double, !dbg [[DBG229]]
-+// CHECK-DEBUG-NEXT:    [[TMP33:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG195]]
-+// CHECK-DEBUG-NEXT:    [[ADD32:%.*]] = fadd double [[CONV31]], [[TMP33]], !dbg [[DBG230:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV33:%.*]] = fptrunc double [[ADD32]] to float, !dbg [[DBG229]]
-+// CHECK-DEBUG-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG231:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store float [[CONV33]], ptr [[TMP34]], align 4, !dbg [[DBG232:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC25]], !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    [[TMP34:%.*]] = add i32 [[OMP_LOOP_IV28]], [[TMP6]], !dbg [[DBG195]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.8(ptr [[I16]], i32 [[TMP34]], ptr [[AGG_CAPTURED18]]), !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    [[TMP35:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG229:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV31:%.*]] = sitofp i32 [[TMP35]] to double, !dbg [[DBG229]]
+-// CHECK-DEBUG-NEXT:    [[TMP36:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG195]]
+-// CHECK-DEBUG-NEXT:    [[ADD32:%.*]] = fadd double [[CONV31]], [[TMP36]], !dbg [[DBG230:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV33:%.*]] = fptrunc double [[ADD32]] to float, !dbg [[DBG229]]
+-// CHECK-DEBUG-NEXT:    [[TMP37:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG231:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store float [[CONV33]], ptr [[TMP37]], align 4, !dbg [[DBG232:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC25]], !dbg [[DBG193]]
++// CHECK-DEBUG-NEXT:    [[TMP31:%.*]] = add i32 [[OMP_LOOP_IV28]], [[TMP6]], !dbg [[DBG196]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.8(ptr [[I16]], i32 [[TMP31]], ptr [[AGG_CAPTURED18]]), !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    [[TMP32:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG230:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV31:%.*]] = sitofp i32 [[TMP32]] to double, !dbg [[DBG230]]
++// CHECK-DEBUG-NEXT:    [[TMP33:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG196]]
++// CHECK-DEBUG-NEXT:    [[ADD32:%.*]] = fadd double [[CONV31]], [[TMP33]], !dbg [[DBG231:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV33:%.*]] = fptrunc double [[ADD32]] to float, !dbg [[DBG230]]
++// CHECK-DEBUG-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG232:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store float [[CONV33]], ptr [[TMP34]], align 4, !dbg [[DBG233:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC25]], !dbg [[DBG194]]
  // CHECK-DEBUG:       omp_loop.inc25:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT30]] = add nuw i32 [[OMP_LOOP_IV28]], 1, !dbg [[DBG194]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER22]], !dbg [[DBG194]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT30]] = add nuw i32 [[OMP_LOOP_IV28]], 1, !dbg [[DBG193]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER22]], !dbg [[DBG193]]
 -// CHECK-DEBUG:       omp.par.outlined.exit159.exitStub:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT30]] = add nuw i32 [[OMP_LOOP_IV28]], 1, !dbg [[DBG193]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER22]], !dbg [[DBG193]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT30]] = add nuw i32 [[OMP_LOOP_IV28]], 1, !dbg [[DBG194]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER22]], !dbg [[DBG194]]
 +// CHECK-DEBUG:       omp.par.outlined.exit164.exitStub:
  // CHECK-DEBUG-NEXT:    ret void
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.21
--// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR100:%.*]], ptr noalias [[ZERO_ADDR101:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG234:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR100:%.*]], ptr noalias [[ZERO_ADDR101:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG233:![0-9]+]] {
 -// CHECK-DEBUG-NEXT:  omp.par.entry102:
-+// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR103:%.*]], ptr noalias [[ZERO_ADDR104:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG233:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR103:%.*]], ptr noalias [[ZERO_ADDR104:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG234:![0-9]+]] {
 +// CHECK-DEBUG-NEXT:  omp.par.entry105:
  // CHECK-DEBUG-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
  // CHECK-DEBUG-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
@@ -2348,6 +2396,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8
  // CHECK-DEBUG-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
  // CHECK-DEBUG-NEXT:    [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8
+-// CHECK-DEBUG-NEXT:    [[P_LASTITER128:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND129:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND130:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_STRIDE131:%.*]] = alloca i32, align 4
 -// CHECK-DEBUG-NEXT:    [[TID_ADDR_LOCAL106:%.*]] = alloca i32, align 4
 -// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR100]], align 4
 -// CHECK-DEBUG-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL106]], align 4
@@ -2356,66 +2408,66 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 -// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED111:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8
 -// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED112:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4
 -// CHECK-DEBUG-NEXT:    [[DOTCOUNT_ADDR113:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_LASTITER128:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND129:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND130:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_STRIDE131:%.*]] = alloca i32, align 4
 -// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION103:%.*]]
 -// CHECK-DEBUG:       omp.par.region103:
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I110]], metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I110]], align 4, !dbg [[DBG241]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED111]], i32 0, i32 0, !dbg [[DBG242:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store ptr [[I110]], ptr [[TMP2]], align 8, !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED112]], i32 0, i32 0, !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I110]], align 4, !dbg [[DBG243:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR113]], ptr [[AGG_CAPTURED111]]), !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    [[DOTCOUNT114:%.*]] = load i32, ptr [[DOTCOUNT_ADDR113]], align 4, !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER115:%.*]], !dbg [[DBG242]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I110]], [[META234:![0-9]+]], !DIExpression(), [[META240:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I110]], align 4, !dbg [[META240]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED111]], i32 0, i32 0, !dbg [[DBG241:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store ptr [[I110]], ptr [[TMP2]], align 8, !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED112]], i32 0, i32 0, !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I110]], align 4, !dbg [[DBG242:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR113]], ptr [[AGG_CAPTURED111]]), !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    [[DOTCOUNT114:%.*]] = load i32, ptr [[DOTCOUNT_ADDR113]], align 4, !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER115:%.*]], !dbg [[DBG241]]
 -// CHECK-DEBUG:       omp_loop.preheader115:
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND129]], align 4, !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT114]], 1, !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND130]], align 4, !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE131]], align 4, !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM132:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33:[0-9]+]]), !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM132]], i32 34, ptr [[P_LASTITER128]], ptr [[P_LOWERBOUND129]], ptr [[P_UPPERBOUND130]], ptr [[P_STRIDE131]], i32 1, i32 0), !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND129]], align 4, !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND130]], align 4, !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER116:%.*]], !dbg [[DBG242]]
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND129]], align 4, !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT114]], 1, !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND130]], align 4, !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE131]], align 4, !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM132:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33:[0-9]+]]), !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM132]], i32 34, ptr [[P_LASTITER128]], ptr [[P_LOWERBOUND129]], ptr [[P_UPPERBOUND130]], ptr [[P_STRIDE131]], i32 1, i32 0), !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND129]], align 4, !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND130]], align 4, !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER116:%.*]], !dbg [[DBG241]]
 -// CHECK-DEBUG:       omp_loop.header116:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV122:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER115]] ], [ [[OMP_LOOP_NEXT124:%.*]], [[OMP_LOOP_INC119:%.*]] ], !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND117:%.*]], !dbg [[DBG242]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV122:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER115]] ], [ [[OMP_LOOP_NEXT124:%.*]], [[OMP_LOOP_INC119:%.*]] ], !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND117:%.*]], !dbg [[DBG241]]
 -// CHECK-DEBUG:       omp_loop.cond117:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP123:%.*]] = icmp ult i32 [[OMP_LOOP_IV122]], [[TMP9]], !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP123]], label [[OMP_LOOP_BODY118:%.*]], label [[OMP_LOOP_EXIT120:%.*]], !dbg [[DBG242]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP123:%.*]] = icmp ult i32 [[OMP_LOOP_IV122]], [[TMP9]], !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP123]], label [[OMP_LOOP_BODY118:%.*]], label [[OMP_LOOP_EXIT120:%.*]], !dbg [[DBG241]]
 -// CHECK-DEBUG:       omp_loop.exit120:
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM132]]), !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM133:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33]]), !dbg [[DBG244:![0-9]+]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB34:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM133]]), !dbg [[DBG244]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER121:%.*]], !dbg [[DBG242]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM132]]), !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM133:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33]]), !dbg [[DBG243:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB34:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM133]]), !dbg [[DBG243]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER121:%.*]], !dbg [[DBG241]]
 -// CHECK-DEBUG:       omp_loop.after121:
--// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION103_PARALLEL_AFTER:%.*]], !dbg [[DBG245:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION103_PARALLEL_AFTER:%.*]], !dbg [[DBG244:![0-9]+]]
 -// CHECK-DEBUG:       omp.par.region103.parallel.after:
 -// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE104:%.*]]
 -// CHECK-DEBUG:       omp.par.pre_finalize104:
--// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT134_EXITSTUB:%.*]], !dbg [[DBG245]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT134_EXITSTUB:%.*]], !dbg [[DBG244]]
 -// CHECK-DEBUG:       omp_loop.body118:
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV122]], [[TMP6]], !dbg [[DBG244]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.14(ptr [[I110]], i32 [[TMP10]], ptr [[AGG_CAPTURED112]]), !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG246:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV125:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG246]]
--// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG244]]
--// CHECK-DEBUG-NEXT:    [[ADD126:%.*]] = fadd double [[CONV125]], [[TMP12]], !dbg [[DBG247:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV127:%.*]] = fptrunc double [[ADD126]] to float, !dbg [[DBG246]]
--// CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG248:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store float [[CONV127]], ptr [[TMP13]], align 4, !dbg [[DBG249:![0-9]+]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC119]], !dbg [[DBG242]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV122]], [[TMP6]], !dbg [[DBG243]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.14(ptr [[I110]], i32 [[TMP10]], ptr [[AGG_CAPTURED112]]), !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG245:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV125:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG245]]
+-// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG243]]
+-// CHECK-DEBUG-NEXT:    [[ADD126:%.*]] = fadd double [[CONV125]], [[TMP12]], !dbg [[DBG246:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV127:%.*]] = fptrunc double [[ADD126]] to float, !dbg [[DBG245]]
+-// CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG247:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store float [[CONV127]], ptr [[TMP13]], align 4, !dbg [[DBG248:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC119]], !dbg [[DBG241]]
 -// CHECK-DEBUG:       omp_loop.inc119:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT124]] = add nuw i32 [[OMP_LOOP_IV122]], 1, !dbg [[DBG242]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER116]], !dbg [[DBG242]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT124]] = add nuw i32 [[OMP_LOOP_IV122]], 1, !dbg [[DBG241]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER116]], !dbg [[DBG241]]
 -// CHECK-DEBUG:       omp.par.outlined.exit134.exitStub:
++// CHECK-DEBUG-NEXT:    [[P_LASTITER131:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND132:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND133:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_STRIDE134:%.*]] = alloca i32, align 4
 +// CHECK-DEBUG-NEXT:    [[TID_ADDR_LOCAL109:%.*]] = alloca i32, align 4
 +// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR103]], align 4
 +// CHECK-DEBUG-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL109]], align 4
@@ -2424,73 +2476,69 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 +// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED114:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8
 +// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED115:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4
 +// CHECK-DEBUG-NEXT:    [[DOTCOUNT_ADDR116:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_LASTITER131:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND132:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND133:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_STRIDE134:%.*]] = alloca i32, align 4
 +// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION106:%.*]]
 +// CHECK-DEBUG:       omp.par.region106:
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I113]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG240:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I113]], align 4, !dbg [[DBG240]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED114]], i32 0, i32 0, !dbg [[DBG241:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store ptr [[I113]], ptr [[TMP2]], align 8, !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED115]], i32 0, i32 0, !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I113]], align 4, !dbg [[DBG242:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR116]], ptr [[AGG_CAPTURED114]]), !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    [[DOTCOUNT117:%.*]] = load i32, ptr [[DOTCOUNT_ADDR116]], align 4, !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER118:%.*]], !dbg [[DBG241]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I113]], [[META235:![0-9]+]], !DIExpression(), [[META241:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I113]], align 4, !dbg [[META241]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED114]], i32 0, i32 0, !dbg [[DBG242:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store ptr [[I113]], ptr [[TMP2]], align 8, !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED115]], i32 0, i32 0, !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I113]], align 4, !dbg [[DBG243:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR116]], ptr [[AGG_CAPTURED114]]), !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    [[DOTCOUNT117:%.*]] = load i32, ptr [[DOTCOUNT_ADDR116]], align 4, !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER118:%.*]], !dbg [[DBG242]]
 +// CHECK-DEBUG:       omp_loop.preheader118:
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND132]], align 4, !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT117]], 1, !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND133]], align 4, !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE134]], align 4, !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM135:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33:[0-9]+]]), !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM135]], i32 34, ptr [[P_LASTITER131]], ptr [[P_LOWERBOUND132]], ptr [[P_UPPERBOUND133]], ptr [[P_STRIDE134]], i32 1, i32 0), !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND132]], align 4, !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND133]], align 4, !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS1136:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS1136]], 1, !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER119:%.*]], !dbg [[DBG241]]
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND132]], align 4, !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT117]], 1, !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND133]], align 4, !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE134]], align 4, !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM135:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33:[0-9]+]]), !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM135]], i32 34, ptr [[P_LASTITER131]], ptr [[P_LOWERBOUND132]], ptr [[P_UPPERBOUND133]], ptr [[P_STRIDE134]], i32 1, i32 0), !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND132]], align 4, !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND133]], align 4, !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS1136:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS1136]], 1, !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER119:%.*]], !dbg [[DBG242]]
 +// CHECK-DEBUG:       omp_loop.header119:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV125:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER118]] ], [ [[OMP_LOOP_NEXT127:%.*]], [[OMP_LOOP_INC122:%.*]] ], !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND120:%.*]], !dbg [[DBG241]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV125:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER118]] ], [ [[OMP_LOOP_NEXT127:%.*]], [[OMP_LOOP_INC122:%.*]] ], !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND120:%.*]], !dbg [[DBG242]]
 +// CHECK-DEBUG:       omp_loop.cond120:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP126:%.*]] = icmp ult i32 [[OMP_LOOP_IV125]], [[TMP8]], !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP126]], label [[OMP_LOOP_BODY121:%.*]], label [[OMP_LOOP_EXIT123:%.*]], !dbg [[DBG241]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP126:%.*]] = icmp ult i32 [[OMP_LOOP_IV125]], [[TMP8]], !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP126]], label [[OMP_LOOP_BODY121:%.*]], label [[OMP_LOOP_EXIT123:%.*]], !dbg [[DBG242]]
 +// CHECK-DEBUG:       omp_loop.exit123:
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM135]]), !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM137:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33]]), !dbg [[DBG243:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB34:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM137]]), !dbg [[DBG243]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER124:%.*]], !dbg [[DBG241]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM135]]), !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM137:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33]]), !dbg [[DBG244:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB34:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM137]]), !dbg [[DBG244]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER124:%.*]], !dbg [[DBG242]]
 +// CHECK-DEBUG:       omp_loop.after124:
-+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION106_PARALLEL_AFTER:%.*]], !dbg [[DBG244:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION106_PARALLEL_AFTER:%.*]], !dbg [[DBG245:![0-9]+]]
 +// CHECK-DEBUG:       omp.par.region106.parallel.after:
 +// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE107:%.*]]
 +// CHECK-DEBUG:       omp.par.pre_finalize107:
-+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT138_EXITSTUB:%.*]], !dbg [[DBG244]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT138_EXITSTUB:%.*]], !dbg [[DBG245]]
 +// CHECK-DEBUG:       omp_loop.body121:
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV125]], [[TMP6]], !dbg [[DBG243]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.14(ptr [[I113]], i32 [[TMP9]], ptr [[AGG_CAPTURED115]]), !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG245:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV128:%.*]] = sitofp i32 [[TMP10]] to double, !dbg [[DBG245]]
-+// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG243]]
-+// CHECK-DEBUG-NEXT:    [[ADD129:%.*]] = fadd double [[CONV128]], [[TMP11]], !dbg [[DBG246:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV130:%.*]] = fptrunc double [[ADD129]] to float, !dbg [[DBG245]]
-+// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG247:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store float [[CONV130]], ptr [[TMP12]], align 4, !dbg [[DBG248:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC122]], !dbg [[DBG241]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV125]], [[TMP6]], !dbg [[DBG244]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.14(ptr [[I113]], i32 [[TMP9]], ptr [[AGG_CAPTURED115]]), !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG246:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV128:%.*]] = sitofp i32 [[TMP10]] to double, !dbg [[DBG246]]
++// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG244]]
++// CHECK-DEBUG-NEXT:    [[ADD129:%.*]] = fadd double [[CONV128]], [[TMP11]], !dbg [[DBG247:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV130:%.*]] = fptrunc double [[ADD129]] to float, !dbg [[DBG246]]
++// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG248:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store float [[CONV130]], ptr [[TMP12]], align 4, !dbg [[DBG249:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC122]], !dbg [[DBG242]]
 +// CHECK-DEBUG:       omp_loop.inc122:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT127]] = add nuw i32 [[OMP_LOOP_IV125]], 1, !dbg [[DBG241]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER119]], !dbg [[DBG241]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT127]] = add nuw i32 [[OMP_LOOP_IV125]], 1, !dbg [[DBG242]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER119]], !dbg [[DBG242]]
 +// CHECK-DEBUG:       omp.par.outlined.exit138.exitStub:
  // CHECK-DEBUG-NEXT:    ret void
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par
--// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR41:%.*]], ptr noalias [[ZERO_ADDR42:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG250:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR41:%.*]], ptr noalias [[ZERO_ADDR42:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG249:![0-9]+]] {
 -// CHECK-DEBUG-NEXT:  omp.par.entry43:
-+// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR42:%.*]], ptr noalias [[ZERO_ADDR43:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG249:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR42:%.*]], ptr noalias [[ZERO_ADDR43:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG250:![0-9]+]] {
 +// CHECK-DEBUG-NEXT:  omp.par.entry44:
  // CHECK-DEBUG-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
  // CHECK-DEBUG-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
@@ -2498,6 +2546,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8
  // CHECK-DEBUG-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
  // CHECK-DEBUG-NEXT:    [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8
+-// CHECK-DEBUG-NEXT:    [[P_LASTITER69:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND70:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND71:%.*]] = alloca i32, align 4
+-// CHECK-DEBUG-NEXT:    [[P_STRIDE72:%.*]] = alloca i32, align 4
 -// CHECK-DEBUG-NEXT:    [[TID_ADDR_LOCAL47:%.*]] = alloca i32, align 4
 -// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR41]], align 4
 -// CHECK-DEBUG-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL47]], align 4
@@ -2506,65 +2558,65 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 -// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED52:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8
 -// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4
 -// CHECK-DEBUG-NEXT:    [[DOTCOUNT_ADDR54:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_LASTITER69:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND70:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND71:%.*]] = alloca i32, align 4
--// CHECK-DEBUG-NEXT:    [[P_STRIDE72:%.*]] = alloca i32, align 4
 -// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION44:%.*]]
 -// CHECK-DEBUG:       omp.par.region44:
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I51]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG257:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I51]], align 4, !dbg [[DBG257]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED52]], i32 0, i32 0, !dbg [[DBG258:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store ptr [[I51]], ptr [[TMP2]], align 8, !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED53]], i32 0, i32 0, !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I51]], align 4, !dbg [[DBG259:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR54]], ptr [[AGG_CAPTURED52]]), !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    [[DOTCOUNT55:%.*]] = load i32, ptr [[DOTCOUNT_ADDR54]], align 4, !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER56:%.*]], !dbg [[DBG258]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I51]], [[META250:![0-9]+]], !DIExpression(), [[META256:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I51]], align 4, !dbg [[META256]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED52]], i32 0, i32 0, !dbg [[DBG257:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store ptr [[I51]], ptr [[TMP2]], align 8, !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED53]], i32 0, i32 0, !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I51]], align 4, !dbg [[DBG258:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR54]], ptr [[AGG_CAPTURED52]]), !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    [[DOTCOUNT55:%.*]] = load i32, ptr [[DOTCOUNT_ADDR54]], align 4, !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER56:%.*]], !dbg [[DBG257]]
 -// CHECK-DEBUG:       omp_loop.preheader56:
--// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND70]], align 4, !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT55]], 1, !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND71]], align 4, !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE72]], align 4, !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM73:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25:[0-9]+]]), !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM73]], i32 34, ptr [[P_LASTITER69]], ptr [[P_LOWERBOUND70]], ptr [[P_UPPERBOUND71]], ptr [[P_STRIDE72]], i32 1, i32 0), !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND70]], align 4, !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND71]], align 4, !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER57:%.*]], !dbg [[DBG258]]
+-// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND70]], align 4, !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT55]], 1, !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND71]], align 4, !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE72]], align 4, !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM73:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25:[0-9]+]]), !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM73]], i32 34, ptr [[P_LASTITER69]], ptr [[P_LOWERBOUND70]], ptr [[P_UPPERBOUND71]], ptr [[P_STRIDE72]], i32 1, i32 0), !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND70]], align 4, !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND71]], align 4, !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER57:%.*]], !dbg [[DBG257]]
 -// CHECK-DEBUG:       omp_loop.header57:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV63:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER56]] ], [ [[OMP_LOOP_NEXT65:%.*]], [[OMP_LOOP_INC60:%.*]] ], !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND58:%.*]], !dbg [[DBG258]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV63:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER56]] ], [ [[OMP_LOOP_NEXT65:%.*]], [[OMP_LOOP_INC60:%.*]] ], !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND58:%.*]], !dbg [[DBG257]]
 -// CHECK-DEBUG:       omp_loop.cond58:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP64:%.*]] = icmp ult i32 [[OMP_LOOP_IV63]], [[TMP9]], !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP64]], label [[OMP_LOOP_BODY59:%.*]], label [[OMP_LOOP_EXIT61:%.*]], !dbg [[DBG258]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP64:%.*]] = icmp ult i32 [[OMP_LOOP_IV63]], [[TMP9]], !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP64]], label [[OMP_LOOP_BODY59:%.*]], label [[OMP_LOOP_EXIT61:%.*]], !dbg [[DBG257]]
 -// CHECK-DEBUG:       omp_loop.exit61:
--// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM73]]), !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM74:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25]]), !dbg [[DBG260:![0-9]+]]
--// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB26:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM74]]), !dbg [[DBG260]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER62:%.*]], !dbg [[DBG258]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM73]]), !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM74:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25]]), !dbg [[DBG259:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB26:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM74]]), !dbg [[DBG259]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER62:%.*]], !dbg [[DBG257]]
 -// CHECK-DEBUG:       omp_loop.after62:
--// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION44_PARALLEL_AFTER:%.*]], !dbg [[DBG261:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION44_PARALLEL_AFTER:%.*]], !dbg [[DBG260:![0-9]+]]
 -// CHECK-DEBUG:       omp.par.region44.parallel.after:
 -// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE45:%.*]]
 -// CHECK-DEBUG:       omp.par.pre_finalize45:
--// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG261]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG260]]
 -// CHECK-DEBUG:       omp_loop.body59:
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV63]], [[TMP6]], !dbg [[DBG260]]
--// CHECK-DEBUG-NEXT:    call void @__captured_stmt.10(ptr [[I51]], i32 [[TMP10]], ptr [[AGG_CAPTURED53]]), !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG262:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV66:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG262]]
--// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG260]]
--// CHECK-DEBUG-NEXT:    [[ADD67:%.*]] = fadd double [[CONV66]], [[TMP12]], !dbg [[DBG263:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[CONV68:%.*]] = fptrunc double [[ADD67]] to float, !dbg [[DBG262]]
--// CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG264:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store float [[CONV68]], ptr [[TMP13]], align 4, !dbg [[DBG265:![0-9]+]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC60]], !dbg [[DBG258]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV63]], [[TMP6]], !dbg [[DBG259]]
+-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.10(ptr [[I51]], i32 [[TMP10]], ptr [[AGG_CAPTURED53]]), !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG261:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV66:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG261]]
+-// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG259]]
+-// CHECK-DEBUG-NEXT:    [[ADD67:%.*]] = fadd double [[CONV66]], [[TMP12]], !dbg [[DBG262:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[CONV68:%.*]] = fptrunc double [[ADD67]] to float, !dbg [[DBG261]]
+-// CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG263:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    store float [[CONV68]], ptr [[TMP13]], align 4, !dbg [[DBG264:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC60]], !dbg [[DBG257]]
 -// CHECK-DEBUG:       omp_loop.inc60:
--// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT65]] = add nuw i32 [[OMP_LOOP_IV63]], 1, !dbg [[DBG258]]
--// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER57]], !dbg [[DBG258]]
+-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT65]] = add nuw i32 [[OMP_LOOP_IV63]], 1, !dbg [[DBG257]]
+-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER57]], !dbg [[DBG257]]
++// CHECK-DEBUG-NEXT:    [[P_LASTITER70:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND71:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND72:%.*]] = alloca i32, align 4
++// CHECK-DEBUG-NEXT:    [[P_STRIDE73:%.*]] = alloca i32, align 4
 +// CHECK-DEBUG-NEXT:    [[TID_ADDR_LOCAL48:%.*]] = alloca i32, align 4
 +// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR42]], align 4
 +// CHECK-DEBUG-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL48]], align 4
@@ -2573,72 +2625,68 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
 +// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8
 +// CHECK-DEBUG-NEXT:    [[AGG_CAPTURED54:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4
 +// CHECK-DEBUG-NEXT:    [[DOTCOUNT_ADDR55:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_LASTITER70:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_LOWERBOUND71:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_UPPERBOUND72:%.*]] = alloca i32, align 4
-+// CHECK-DEBUG-NEXT:    [[P_STRIDE73:%.*]] = alloca i32, align 4
 +// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION45:%.*]]
 +// CHECK-DEBUG:       omp.par.region45:
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I52]], metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG256:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I52]], align 4, !dbg [[DBG256]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED53]], i32 0, i32 0, !dbg [[DBG257:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store ptr [[I52]], ptr [[TMP2]], align 8, !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED54]], i32 0, i32 0, !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I52]], align 4, !dbg [[DBG258:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR55]], ptr [[AGG_CAPTURED53]]), !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    [[DOTCOUNT56:%.*]] = load i32, ptr [[DOTCOUNT_ADDR55]], align 4, !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER57:%.*]], !dbg [[DBG257]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I52]], [[META251:![0-9]+]], !DIExpression(), [[META257:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[I52]], align 4, !dbg [[META257]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED53]], i32 0, i32 0, !dbg [[DBG258:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store ptr [[I52]], ptr [[TMP2]], align 8, !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED54]], i32 0, i32 0, !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I52]], align 4, !dbg [[DBG259:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR55]], ptr [[AGG_CAPTURED53]]), !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    [[DOTCOUNT56:%.*]] = load i32, ptr [[DOTCOUNT_ADDR55]], align 4, !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER57:%.*]], !dbg [[DBG258]]
 +// CHECK-DEBUG:       omp_loop.preheader57:
-+// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND71]], align 4, !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT56]], 1, !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND72]], align 4, !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE73]], align 4, !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM74:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25:[0-9]+]]), !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM74]], i32 34, ptr [[P_LASTITER70]], ptr [[P_LOWERBOUND71]], ptr [[P_UPPERBOUND72]], ptr [[P_STRIDE73]], i32 1, i32 0), !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND71]], align 4, !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND72]], align 4, !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS175:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS175]], 1, !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER58:%.*]], !dbg [[DBG257]]
++// CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND71]], align 4, !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = sub i32 [[DOTCOUNT56]], 1, !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP5]], ptr [[P_UPPERBOUND72]], align 4, !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[P_STRIDE73]], align 4, !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM74:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25:[0-9]+]]), !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM74]], i32 34, ptr [[P_LASTITER70]], ptr [[P_LOWERBOUND71]], ptr [[P_UPPERBOUND72]], ptr [[P_STRIDE73]], i32 1, i32 0), !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND71]], align 4, !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND72]], align 4, !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    [[TRIP_COUNT_MINUS175:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[TRIP_COUNT_MINUS175]], 1, !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER58:%.*]], !dbg [[DBG258]]
 +// CHECK-DEBUG:       omp_loop.header58:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV64:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER57]] ], [ [[OMP_LOOP_NEXT66:%.*]], [[OMP_LOOP_INC61:%.*]] ], !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND59:%.*]], !dbg [[DBG257]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV64:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER57]] ], [ [[OMP_LOOP_NEXT66:%.*]], [[OMP_LOOP_INC61:%.*]] ], !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND59:%.*]], !dbg [[DBG258]]
 +// CHECK-DEBUG:       omp_loop.cond59:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP65:%.*]] = icmp ult i32 [[OMP_LOOP_IV64]], [[TMP8]], !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP65]], label [[OMP_LOOP_BODY60:%.*]], label [[OMP_LOOP_EXIT62:%.*]], !dbg [[DBG257]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP65:%.*]] = icmp ult i32 [[OMP_LOOP_IV64]], [[TMP8]], !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP65]], label [[OMP_LOOP_BODY60:%.*]], label [[OMP_LOOP_EXIT62:%.*]], !dbg [[DBG258]]
 +// CHECK-DEBUG:       omp_loop.exit62:
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM74]]), !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM76:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25]]), !dbg [[DBG259:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB26:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM76]]), !dbg [[DBG259]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER63:%.*]], !dbg [[DBG257]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM74]]), !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM76:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25]]), !dbg [[DBG260:![0-9]+]]
++// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB26:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM76]]), !dbg [[DBG260]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER63:%.*]], !dbg [[DBG258]]
 +// CHECK-DEBUG:       omp_loop.after63:
-+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION45_PARALLEL_AFTER:%.*]], !dbg [[DBG260:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION45_PARALLEL_AFTER:%.*]], !dbg [[DBG261:![0-9]+]]
 +// CHECK-DEBUG:       omp.par.region45.parallel.after:
 +// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE46:%.*]]
 +// CHECK-DEBUG:       omp.par.pre_finalize46:
-+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG260]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG261]]
 +// CHECK-DEBUG:       omp_loop.body60:
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV64]], [[TMP6]], !dbg [[DBG259]]
-+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.10(ptr [[I52]], i32 [[TMP9]], ptr [[AGG_CAPTURED54]]), !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG261:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV67:%.*]] = sitofp i32 [[TMP10]] to double, !dbg [[DBG261]]
-+// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG259]]
-+// CHECK-DEBUG-NEXT:    [[ADD68:%.*]] = fadd double [[CONV67]], [[TMP11]], !dbg [[DBG262:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[CONV69:%.*]] = fptrunc double [[ADD68]] to float, !dbg [[DBG261]]
-+// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG263:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store float [[CONV69]], ptr [[TMP12]], align 4, !dbg [[DBG264:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC61]], !dbg [[DBG257]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV64]], [[TMP6]], !dbg [[DBG260]]
++// CHECK-DEBUG-NEXT:    call void @__captured_stmt.10(ptr [[I52]], i32 [[TMP9]], ptr [[AGG_CAPTURED54]]), !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG262:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV67:%.*]] = sitofp i32 [[TMP10]] to double, !dbg [[DBG262]]
++// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG260]]
++// CHECK-DEBUG-NEXT:    [[ADD68:%.*]] = fadd double [[CONV67]], [[TMP11]], !dbg [[DBG263:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[CONV69:%.*]] = fptrunc double [[ADD68]] to float, !dbg [[DBG262]]
++// CHECK-DEBUG-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG264:![0-9]+]]
++// CHECK-DEBUG-NEXT:    store float [[CONV69]], ptr [[TMP12]], align 4, !dbg [[DBG265:![0-9]+]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC61]], !dbg [[DBG258]]
 +// CHECK-DEBUG:       omp_loop.inc61:
-+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT66]] = add nuw i32 [[OMP_LOOP_IV64]], 1, !dbg [[DBG257]]
-+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER58]], !dbg [[DBG257]]
++// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT66]] = add nuw i32 [[OMP_LOOP_IV64]], 1, !dbg [[DBG258]]
++// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER58]], !dbg [[DBG258]]
  // CHECK-DEBUG:       omp.par.outlined.exit.exitStub:
  // CHECK-DEBUG-NEXT:    ret void
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.5
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG266:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG265:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG265:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG266:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
@@ -2646,109 +2694,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    [[DOTSTOP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[DOTSTEP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META266:![0-9]+]], metadata !DIExpression()), !dbg [[DBG267:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META266:![0-9]+]], !DIExpression(), [[META267:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META267:![0-9]+]], !DIExpression(), [[META268:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META268:![0-9]+]], metadata !DIExpression()), !dbg [[DBG267]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META268:![0-9]+]], !DIExpression(), [[META267]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META269:![0-9]+]], !DIExpression(), [[META268]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META270:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG273:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG273]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG273]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG272]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG276:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META277:![0-9]+]], metadata !DIExpression()), !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG276]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG271:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG272:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG272]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG272]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG271]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG275:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG275]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META269:![0-9]+]], !DIExpression(), [[META271:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG272:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG272]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG272]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META271]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META274:![0-9]+]], !DIExpression(), [[META275:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META276:![0-9]+]], !DIExpression(), [[META275]])
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META275]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META270:![0-9]+]], !DIExpression(), [[META272:![0-9]+]])
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG273:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG273]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG273]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META272]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META275:![0-9]+]], !DIExpression(), [[META276:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META276]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META277:![0-9]+]], !DIExpression(), [[META276]])
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META276]]
  // CHECK-DEBUG:       cond.true:
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG276]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG275]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META275]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META276]]
  // CHECK-DEBUG:       cond.false:
--// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG276]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG275]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META275]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META276]]
  // CHECK-DEBUG:       cond.end:
--// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG276]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG278:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG275]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG277:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META275]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG277:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META276]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG278:![0-9]+]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.6
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG280:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG279:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG279:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG280:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META281:![0-9]+]], metadata !DIExpression()), !dbg [[DBG282:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META280:![0-9]+]], metadata !DIExpression()), !dbg [[DBG281:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META280:![0-9]+]], !DIExpression(), [[META281:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META281:![0-9]+]], !DIExpression(), [[META282:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META283:![0-9]+]], metadata !DIExpression()), !dbg [[DBG282]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG281]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META282:![0-9]+]], !DIExpression(), [[META281]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META283:![0-9]+]], !DIExpression(), [[META282]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG282]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META283:![0-9]+]], metadata !DIExpression()), !dbg [[DBG281]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META283:![0-9]+]], !DIExpression(), [[META281]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META284:![0-9]+]], !DIExpression(), [[META282]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG285:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG285]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG287:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG287]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG287]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG287]]
--// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG282]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG285]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG284:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG284]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG286:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG286]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG286]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG286]]
-+// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG281]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG284]]
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG284:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG284]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG286:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG286]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG286]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG286]]
+-// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META281]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG284]]
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG285:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG285]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG287:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG287]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG287]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG287]]
++// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META282]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG285]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.7
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG288:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG287:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG287:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG288:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
@@ -2756,109 +2804,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    [[DOTSTOP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[DOTSTEP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META289:![0-9]+]], metadata !DIExpression()), !dbg [[DBG290:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META288:![0-9]+]], metadata !DIExpression()), !dbg [[DBG289:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META288:![0-9]+]], !DIExpression(), [[META289:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META289:![0-9]+]], !DIExpression(), [[META290:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META291:![0-9]+]], metadata !DIExpression()), !dbg [[DBG290]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META290:![0-9]+]], metadata !DIExpression()), !dbg [[DBG289]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META290:![0-9]+]], !DIExpression(), [[META289]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META291:![0-9]+]], !DIExpression(), [[META290]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META292:![0-9]+]], metadata !DIExpression()), !dbg [[DBG294:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG295:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG295]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG295]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG294]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG298]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META291:![0-9]+]], metadata !DIExpression()), !dbg [[DBG293:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG294:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG294]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG294]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG293]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META296:![0-9]+]], metadata !DIExpression()), !dbg [[DBG297:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META298:![0-9]+]], metadata !DIExpression()), !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG297]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META291:![0-9]+]], !DIExpression(), [[META293:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG294:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG294]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG294]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META293]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META296:![0-9]+]], !DIExpression(), [[META297:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META298:![0-9]+]], !DIExpression(), [[META297]])
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META297]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META292:![0-9]+]], !DIExpression(), [[META294:![0-9]+]])
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG295:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG295]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG295]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META294]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META297:![0-9]+]], !DIExpression(), [[META298:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META298]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META299:![0-9]+]], !DIExpression(), [[META298]])
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META298]]
  // CHECK-DEBUG:       cond.true:
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG298]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG297]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META297]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META298]]
  // CHECK-DEBUG:       cond.false:
--// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG298]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG297]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META297]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META298]]
  // CHECK-DEBUG:       cond.end:
--// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG298]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG300:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG297]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG299:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META297]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG299:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META298]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG300:![0-9]+]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.8
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG302:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG301:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG301:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG302:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META303:![0-9]+]], metadata !DIExpression()), !dbg [[DBG304:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META302:![0-9]+]], metadata !DIExpression()), !dbg [[DBG303:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META302:![0-9]+]], !DIExpression(), [[META303:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META303:![0-9]+]], !DIExpression(), [[META304:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG304]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META304:![0-9]+]], metadata !DIExpression()), !dbg [[DBG303]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META304:![0-9]+]], !DIExpression(), [[META303]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META305:![0-9]+]], !DIExpression(), [[META304]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META306:![0-9]+]], metadata !DIExpression()), !dbg [[DBG304]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG303]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META305:![0-9]+]], !DIExpression(), [[META303]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META306:![0-9]+]], !DIExpression(), [[META304]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG307:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG307]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG309:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG309]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG309]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG309]]
--// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG304]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG307]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG306:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG306]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG308:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG308]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG308]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG308]]
-+// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG303]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG306]]
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG306:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG306]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG308:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG308]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG308]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG308]]
+-// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META303]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG306]]
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG307:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG307]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG309:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG309]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG309]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG309]]
++// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META304]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG307]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.9
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG310:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG309:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG309:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG310:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
@@ -2866,109 +2914,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    [[DOTSTOP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[DOTSTEP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META311:![0-9]+]], metadata !DIExpression()), !dbg [[DBG312:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META310:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META310:![0-9]+]], !DIExpression(), [[META311:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META311:![0-9]+]], !DIExpression(), [[META312:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META313:![0-9]+]], metadata !DIExpression()), !dbg [[DBG312]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META312:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META312:![0-9]+]], !DIExpression(), [[META311]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META313:![0-9]+]], !DIExpression(), [[META312]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META314:![0-9]+]], metadata !DIExpression()), !dbg [[DBG316:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG317:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG317]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG317]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG316]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META319:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META321:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG320]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META313:![0-9]+]], metadata !DIExpression()), !dbg [[DBG315:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG316:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG316]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG316]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG315]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META318:![0-9]+]], metadata !DIExpression()), !dbg [[DBG319:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META320:![0-9]+]], metadata !DIExpression()), !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG319]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META313:![0-9]+]], !DIExpression(), [[META315:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG316:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG316]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG316]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META315]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META318:![0-9]+]], !DIExpression(), [[META319:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META320:![0-9]+]], !DIExpression(), [[META319]])
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META319]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META314:![0-9]+]], !DIExpression(), [[META316:![0-9]+]])
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG317:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG317]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG317]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META316]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META319:![0-9]+]], !DIExpression(), [[META320:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META320]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META321:![0-9]+]], !DIExpression(), [[META320]])
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META320]]
  // CHECK-DEBUG:       cond.true:
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG320]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG319]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META319]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META320]]
  // CHECK-DEBUG:       cond.false:
--// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG320]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG319]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META319]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META320]]
  // CHECK-DEBUG:       cond.end:
--// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG320]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG322:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG319]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG321:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META319]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG321:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META320]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG322:![0-9]+]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.10
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG324:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG323:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG323:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG324:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META325:![0-9]+]], metadata !DIExpression()), !dbg [[DBG326:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META324:![0-9]+]], metadata !DIExpression()), !dbg [[DBG325:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META324:![0-9]+]], !DIExpression(), [[META325:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META325:![0-9]+]], !DIExpression(), [[META326:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META327:![0-9]+]], metadata !DIExpression()), !dbg [[DBG326]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META326:![0-9]+]], metadata !DIExpression()), !dbg [[DBG325]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META326:![0-9]+]], !DIExpression(), [[META325]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META327:![0-9]+]], !DIExpression(), [[META326]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META328:![0-9]+]], metadata !DIExpression()), !dbg [[DBG326]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META327:![0-9]+]], metadata !DIExpression()), !dbg [[DBG325]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META327:![0-9]+]], !DIExpression(), [[META325]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META328:![0-9]+]], !DIExpression(), [[META326]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG329:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG329]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG331:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG331]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG331]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG331]]
--// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG326]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG329]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG328:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG328]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG330:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG330]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG330]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG330]]
-+// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG325]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG328]]
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG328:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG328]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG330:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG330]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG330]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG330]]
+-// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META325]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG328]]
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG329:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG329]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG331:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG331]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG331]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG331]]
++// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META326]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG329]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.11
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG332:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG331:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG331:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG332:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
@@ -2976,109 +3024,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    [[DOTSTOP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[DOTSTEP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META333:![0-9]+]], metadata !DIExpression()), !dbg [[DBG334:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META332:![0-9]+]], metadata !DIExpression()), !dbg [[DBG333:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META332:![0-9]+]], !DIExpression(), [[META333:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META333:![0-9]+]], !DIExpression(), [[META334:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META335:![0-9]+]], metadata !DIExpression()), !dbg [[DBG334]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META334:![0-9]+]], metadata !DIExpression()), !dbg [[DBG333]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META334:![0-9]+]], !DIExpression(), [[META333]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META335:![0-9]+]], !DIExpression(), [[META334]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META336:![0-9]+]], metadata !DIExpression()), !dbg [[DBG338:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG339:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG339]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG339]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG338]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META341:![0-9]+]], metadata !DIExpression()), !dbg [[DBG342:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META343:![0-9]+]], metadata !DIExpression()), !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG342]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META335:![0-9]+]], metadata !DIExpression()), !dbg [[DBG337:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG338:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG338]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG338]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG337]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META340:![0-9]+]], metadata !DIExpression()), !dbg [[DBG341:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META342:![0-9]+]], metadata !DIExpression()), !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG341]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META335:![0-9]+]], !DIExpression(), [[META337:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG338:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG338]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG338]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META337]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META340:![0-9]+]], !DIExpression(), [[META341:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META342:![0-9]+]], !DIExpression(), [[META341]])
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META341]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META336:![0-9]+]], !DIExpression(), [[META338:![0-9]+]])
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG339:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG339]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG339]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META338]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META341:![0-9]+]], !DIExpression(), [[META342:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META342]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META343:![0-9]+]], !DIExpression(), [[META342]])
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META342]]
  // CHECK-DEBUG:       cond.true:
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG342]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG341]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META341]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META342]]
  // CHECK-DEBUG:       cond.false:
--// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG342]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG341]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META341]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META342]]
  // CHECK-DEBUG:       cond.end:
--// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG342]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG344:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG341]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG343:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META341]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG343:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META342]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG344:![0-9]+]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.12
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG346:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG345:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG345:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG346:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META347:![0-9]+]], metadata !DIExpression()), !dbg [[DBG348:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META346:![0-9]+]], metadata !DIExpression()), !dbg [[DBG347:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META346:![0-9]+]], !DIExpression(), [[META347:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META347:![0-9]+]], !DIExpression(), [[META348:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META349:![0-9]+]], metadata !DIExpression()), !dbg [[DBG348]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META348:![0-9]+]], metadata !DIExpression()), !dbg [[DBG347]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META348:![0-9]+]], !DIExpression(), [[META347]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META349:![0-9]+]], !DIExpression(), [[META348]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META350:![0-9]+]], metadata !DIExpression()), !dbg [[DBG348]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META349:![0-9]+]], metadata !DIExpression()), !dbg [[DBG347]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META349:![0-9]+]], !DIExpression(), [[META347]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META350:![0-9]+]], !DIExpression(), [[META348]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG351:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG351]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG353:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG353]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG353]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG353]]
--// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG348]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG351]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG350:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG350]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG352:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG352]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG352]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG352]]
-+// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG347]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG350]]
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG350:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG350]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG352:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG352]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG352]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG352]]
+-// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META347]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG350]]
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG351:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG351]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG353:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG353]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG353]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG353]]
++// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META348]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG351]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.13
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG354:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG353:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG353:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG354:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
@@ -3086,109 +3134,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    [[DOTSTOP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[DOTSTEP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META355:![0-9]+]], metadata !DIExpression()), !dbg [[DBG356:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META354:![0-9]+]], metadata !DIExpression()), !dbg [[DBG355:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META354:![0-9]+]], !DIExpression(), [[META355:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META355:![0-9]+]], !DIExpression(), [[META356:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META357:![0-9]+]], metadata !DIExpression()), !dbg [[DBG356]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META356:![0-9]+]], metadata !DIExpression()), !dbg [[DBG355]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META356:![0-9]+]], !DIExpression(), [[META355]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META357:![0-9]+]], !DIExpression(), [[META356]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META358:![0-9]+]], metadata !DIExpression()), !dbg [[DBG360:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG361:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG361]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG361]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG360]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META363:![0-9]+]], metadata !DIExpression()), !dbg [[DBG364:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META365:![0-9]+]], metadata !DIExpression()), !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG364]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META357:![0-9]+]], metadata !DIExpression()), !dbg [[DBG359:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG360:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG360]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG360]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG359]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META362:![0-9]+]], metadata !DIExpression()), !dbg [[DBG363:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META364:![0-9]+]], metadata !DIExpression()), !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG363]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META357:![0-9]+]], !DIExpression(), [[META359:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG360:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG360]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG360]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META359]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META362:![0-9]+]], !DIExpression(), [[META363:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META364:![0-9]+]], !DIExpression(), [[META363]])
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META363]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META358:![0-9]+]], !DIExpression(), [[META360:![0-9]+]])
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG361:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG361]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG361]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META360]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META363:![0-9]+]], !DIExpression(), [[META364:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META364]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META365:![0-9]+]], !DIExpression(), [[META364]])
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META364]]
  // CHECK-DEBUG:       cond.true:
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG364]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG363]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META363]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META364]]
  // CHECK-DEBUG:       cond.false:
--// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG364]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG363]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META363]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META364]]
  // CHECK-DEBUG:       cond.end:
--// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG364]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG366:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG363]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG365:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META363]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG365:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META364]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG366:![0-9]+]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.14
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG368:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG367:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG367:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG368:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META369:![0-9]+]], metadata !DIExpression()), !dbg [[DBG370:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META368:![0-9]+]], metadata !DIExpression()), !dbg [[DBG369:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META368:![0-9]+]], !DIExpression(), [[META369:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META369:![0-9]+]], !DIExpression(), [[META370:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META371:![0-9]+]], metadata !DIExpression()), !dbg [[DBG370]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META370:![0-9]+]], metadata !DIExpression()), !dbg [[DBG369]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META370:![0-9]+]], !DIExpression(), [[META369]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META371:![0-9]+]], !DIExpression(), [[META370]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META372:![0-9]+]], metadata !DIExpression()), !dbg [[DBG370]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META371:![0-9]+]], metadata !DIExpression()), !dbg [[DBG369]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META371:![0-9]+]], !DIExpression(), [[META369]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META372:![0-9]+]], !DIExpression(), [[META370]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG373:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG373]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG375:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG375]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG375]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG375]]
--// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG370]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG373]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG372:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG372]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG374:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG374]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG374]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG374]]
-+// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG369]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG372]]
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG372:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG372]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG374:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG374]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG374]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG374]]
+-// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META369]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG372]]
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG373:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG373]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG375:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG375]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG375]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG375]]
++// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META370]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG373]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.15
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG376:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG375:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG375:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG376:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
@@ -3196,109 +3244,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    [[DOTSTOP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[DOTSTEP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META377:![0-9]+]], metadata !DIExpression()), !dbg [[DBG378:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META376:![0-9]+]], metadata !DIExpression()), !dbg [[DBG377:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META376:![0-9]+]], !DIExpression(), [[META377:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META377:![0-9]+]], !DIExpression(), [[META378:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META379:![0-9]+]], metadata !DIExpression()), !dbg [[DBG378]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META378:![0-9]+]], metadata !DIExpression()), !dbg [[DBG377]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META378:![0-9]+]], !DIExpression(), [[META377]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META379:![0-9]+]], !DIExpression(), [[META378]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META380:![0-9]+]], metadata !DIExpression()), !dbg [[DBG382:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG383:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG383]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG383]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG382]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META385:![0-9]+]], metadata !DIExpression()), !dbg [[DBG386:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META387:![0-9]+]], metadata !DIExpression()), !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG386]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META379:![0-9]+]], metadata !DIExpression()), !dbg [[DBG381:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG382:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG382]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG382]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG381]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META384:![0-9]+]], metadata !DIExpression()), !dbg [[DBG385:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META386:![0-9]+]], metadata !DIExpression()), !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG385]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META379:![0-9]+]], !DIExpression(), [[META381:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG382:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG382]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG382]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META381]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META384:![0-9]+]], !DIExpression(), [[META385:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META386:![0-9]+]], !DIExpression(), [[META385]])
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META385]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META380:![0-9]+]], !DIExpression(), [[META382:![0-9]+]])
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG383:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG383]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG383]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META382]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META385:![0-9]+]], !DIExpression(), [[META386:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META386]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META387:![0-9]+]], !DIExpression(), [[META386]])
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META386]]
  // CHECK-DEBUG:       cond.true:
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG386]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG385]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META385]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META386]]
  // CHECK-DEBUG:       cond.false:
--// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG386]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG385]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META385]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META386]]
  // CHECK-DEBUG:       cond.end:
--// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG386]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG388:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG385]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG387:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META385]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG387:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META386]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG388:![0-9]+]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.16
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG390:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG389:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG389:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG390:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META391:![0-9]+]], metadata !DIExpression()), !dbg [[DBG392:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META390:![0-9]+]], metadata !DIExpression()), !dbg [[DBG391:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META390:![0-9]+]], !DIExpression(), [[META391:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META391:![0-9]+]], !DIExpression(), [[META392:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META393:![0-9]+]], metadata !DIExpression()), !dbg [[DBG392]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META392:![0-9]+]], metadata !DIExpression()), !dbg [[DBG391]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META392:![0-9]+]], !DIExpression(), [[META391]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META393:![0-9]+]], !DIExpression(), [[META392]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META394:![0-9]+]], metadata !DIExpression()), !dbg [[DBG392]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META393:![0-9]+]], metadata !DIExpression()), !dbg [[DBG391]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META393:![0-9]+]], !DIExpression(), [[META391]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META394:![0-9]+]], !DIExpression(), [[META392]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG395:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG395]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG397:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG397]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG397]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG397]]
--// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG392]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG395]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG394:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG394]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG396:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG396]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG396]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG396]]
-+// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG391]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG394]]
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG394:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG394]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG396:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG396]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG396]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG396]]
+-// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META391]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG394]]
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG395:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG395]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG397:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG397]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG397]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG397]]
++// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META392]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG395]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.17
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG398:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG397:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG397:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG398:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
@@ -3306,109 +3354,109 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    [[DOTSTOP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[DOTSTEP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META399:![0-9]+]], metadata !DIExpression()), !dbg [[DBG400:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META398:![0-9]+]], metadata !DIExpression()), !dbg [[DBG399:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META398:![0-9]+]], !DIExpression(), [[META399:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META399:![0-9]+]], !DIExpression(), [[META400:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META401:![0-9]+]], metadata !DIExpression()), !dbg [[DBG400]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META400:![0-9]+]], metadata !DIExpression()), !dbg [[DBG399]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META400:![0-9]+]], !DIExpression(), [[META399]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META401:![0-9]+]], !DIExpression(), [[META400]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META402:![0-9]+]], metadata !DIExpression()), !dbg [[DBG404:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG405:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG405]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG405]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG404]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META407:![0-9]+]], metadata !DIExpression()), !dbg [[DBG408:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META409:![0-9]+]], metadata !DIExpression()), !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG408]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META401:![0-9]+]], metadata !DIExpression()), !dbg [[DBG403:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG404:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG404]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG404]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG403]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META406:![0-9]+]], metadata !DIExpression()), !dbg [[DBG407:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META408:![0-9]+]], metadata !DIExpression()), !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG407]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META401:![0-9]+]], !DIExpression(), [[META403:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG404:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG404]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG404]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META403]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META406:![0-9]+]], !DIExpression(), [[META407:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META408:![0-9]+]], !DIExpression(), [[META407]])
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META407]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META402:![0-9]+]], !DIExpression(), [[META404:![0-9]+]])
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG405:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG405]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG405]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META404]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META407:![0-9]+]], !DIExpression(), [[META408:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META408]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META409:![0-9]+]], !DIExpression(), [[META408]])
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META408]]
  // CHECK-DEBUG:       cond.true:
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG408]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG407]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META407]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META408]]
  // CHECK-DEBUG:       cond.false:
--// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG408]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG407]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META407]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META408]]
  // CHECK-DEBUG:       cond.end:
--// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG408]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG410:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG407]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG409:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META407]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG409:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META408]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG410:![0-9]+]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.18
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG412:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG411:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG411:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG412:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META413:![0-9]+]], metadata !DIExpression()), !dbg [[DBG414:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META412:![0-9]+]], metadata !DIExpression()), !dbg [[DBG413:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META412:![0-9]+]], !DIExpression(), [[META413:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META413:![0-9]+]], !DIExpression(), [[META414:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META415:![0-9]+]], metadata !DIExpression()), !dbg [[DBG414]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META414:![0-9]+]], metadata !DIExpression()), !dbg [[DBG413]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META414:![0-9]+]], !DIExpression(), [[META413]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META415:![0-9]+]], !DIExpression(), [[META414]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META416:![0-9]+]], metadata !DIExpression()), !dbg [[DBG414]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META415:![0-9]+]], metadata !DIExpression()), !dbg [[DBG413]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META415:![0-9]+]], !DIExpression(), [[META413]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META416:![0-9]+]], !DIExpression(), [[META414]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG417:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG417]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG419:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG419]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG419]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG419]]
--// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG414]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG417]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG416:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG416]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG418:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG418]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG418]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG418]]
-+// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG413]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG416]]
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG416:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG416]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG418:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG418]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG418]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG418]]
+-// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META413]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG416]]
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG417:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG417]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG419:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG419]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG419]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG419]]
++// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META414]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG417]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.19
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG420:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG419:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG419:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG420:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
@@ -3416,109 +3464,470 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/clang/test/OpenMP/irbuilder_nest
  // CHECK-DEBUG-NEXT:    [[DOTSTOP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[DOTSTEP:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META421:![0-9]+]], metadata !DIExpression()), !dbg [[DBG422:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META420:![0-9]+]], metadata !DIExpression()), !dbg [[DBG421:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META420:![0-9]+]], !DIExpression(), [[META421:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DISTANCE_ADDR]], [[META421:![0-9]+]], !DIExpression(), [[META422:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META423:![0-9]+]], metadata !DIExpression()), !dbg [[DBG422]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META422:![0-9]+]], metadata !DIExpression()), !dbg [[DBG421]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META422:![0-9]+]], !DIExpression(), [[META421]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META423:![0-9]+]], !DIExpression(), [[META422]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META424:![0-9]+]], metadata !DIExpression()), !dbg [[DBG426:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG427:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG427]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG427]]
--// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG426]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META429:![0-9]+]], metadata !DIExpression()), !dbg [[DBG430:![0-9]+]]
--// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META431:![0-9]+]], metadata !DIExpression()), !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG430]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META423:![0-9]+]], metadata !DIExpression()), !dbg [[DBG425:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG426:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG426]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG426]]
-+// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG425]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META428:![0-9]+]], metadata !DIExpression()), !dbg [[DBG429:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META430:![0-9]+]], metadata !DIExpression()), !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG429]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META423:![0-9]+]], !DIExpression(), [[META425:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG426:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG426]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG426]]
+-// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META425]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META428:![0-9]+]], !DIExpression(), [[META429:![0-9]+]])
+-// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META430:![0-9]+]], !DIExpression(), [[META429]])
+-// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META429]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTART]], [[META424:![0-9]+]], !DIExpression(), [[META426:![0-9]+]])
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG427:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG427]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG427]]
++// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META426]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTOP]], [[META429:![0-9]+]], !DIExpression(), [[META430:![0-9]+]])
++// CHECK-DEBUG-NEXT:    store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META430]]
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[DOTSTEP]], [[META431:![0-9]+]], !DIExpression(), [[META430]])
++// CHECK-DEBUG-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META430]]
  // CHECK-DEBUG:       cond.true:
--// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG430]]
-+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG429]]
+-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META429]]
++// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END:%.*]], !dbg [[META430]]
  // CHECK-DEBUG:       cond.false:
--// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG430]]
-+// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[DBG429]]
+-// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META429]]
++// CHECK-DEBUG-NEXT:    br label [[COND_END]], !dbg [[META430]]
  // CHECK-DEBUG:       cond.end:
--// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG430]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG432:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG429]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG431:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META429]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG431:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META430]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG432:![0-9]+]]
  //
  //
  // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.20
--// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG434:![0-9]+]] {
-+// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG433:![0-9]+]] {
+-// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG433:![0-9]+]] {
++// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG434:![0-9]+]] {
  // CHECK-DEBUG-NEXT:  entry:
  // CHECK-DEBUG-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i32, align 4
  // CHECK-DEBUG-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
  // CHECK-DEBUG-NEXT:    store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META435:![0-9]+]], metadata !DIExpression()), !dbg [[DBG436:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META434:![0-9]+]], metadata !DIExpression()), !dbg [[DBG435:![0-9]+]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META434:![0-9]+]], !DIExpression(), [[META435:![0-9]+]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META435:![0-9]+]], !DIExpression(), [[META436:![0-9]+]])
  // CHECK-DEBUG-NEXT:    store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META437:![0-9]+]], metadata !DIExpression()), !dbg [[DBG436]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META436:![0-9]+]], metadata !DIExpression()), !dbg [[DBG435]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META436:![0-9]+]], !DIExpression(), [[META435]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[LOGICAL_ADDR]], [[META437:![0-9]+]], !DIExpression(), [[META436]])
  // CHECK-DEBUG-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META438:![0-9]+]], metadata !DIExpression()), !dbg [[DBG436]]
-+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META437:![0-9]+]], metadata !DIExpression()), !dbg [[DBG435]]
+-// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META437:![0-9]+]], !DIExpression(), [[META435]])
++// CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META438:![0-9]+]], !DIExpression(), [[META436]])
  // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
--// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG439:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG439]]
--// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG441:![0-9]+]]
--// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG441]]
--// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG441]]
--// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG441]]
--// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG436]]
--// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG439]]
-+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG438:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG438]]
-+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG440:![0-9]+]]
-+// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG440]]
-+// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG440]]
-+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG440]]
-+// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG435]]
-+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG438]]
+-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG438:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG438]]
+-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG440:![0-9]+]]
+-// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG440]]
+-// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG440]]
+-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG440]]
+-// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META435]]
+-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG438]]
++// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG439:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG439]]
++// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG441:![0-9]+]]
++// CHECK-DEBUG-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG441]]
++// CHECK-DEBUG-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG441]]
++// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG441]]
++// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META436]]
++// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG439]]
  //
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Frontend/CodeGenOptions.def llvm-project/flang/include/flang/Frontend/CodeGenOptions.def
---- llvm-project.orig/flang/include/flang/Frontend/CodeGenOptions.def	2024-06-12 10:43:12.596210747 -0500
-+++ llvm-project/flang/include/flang/Frontend/CodeGenOptions.def	2024-06-12 10:44:09.347614281 -0500
-@@ -40,5 +40,7 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/docs/DoConcurrentConversionToOpenMP.md llvm-project-aso/flang/docs/DoConcurrentConversionToOpenMP.md
+--- llvm-project-aso-orig/flang/docs/DoConcurrentConversionToOpenMP.md	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/docs/DoConcurrentConversionToOpenMP.md	2024-11-23 20:39:47.172175395 -0600
+@@ -0,0 +1,332 @@
++<!--===- docs/DoConcurrentMappingToOpenMP.md
++
++   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++   See https://llvm.org/LICENSE.txt for license information.
++   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++
++-->
++
++# `DO CONCURENT` mapping to OpenMP
++
++```{contents}
++---
++local:
++---
++```
++
++This document seeks to describe the effort to parallelize `do concurrent` loops
++by mapping them to OpenMP worksharing constructs. The goals of this document
++are:
++* Describing how to instruct `flang-new` to map `DO CONCURENT` loops to OpenMP
++  constructs.
++* Tracking the current status of such mapping.
++* Describing the limitations of the current implmenentation.
++* Describing next steps.
++
++## Usage
++
++In order to enable `do concurrent` to OpenMP mapping, `flang-new` adds a new
++compiler flag: `-fdo-concurrent-parallel`. This flags has 3 possible values:
++1. `host`: this maps `do concurent` loops to run in parallel on the host CPU.
++   This maps such loops to the equivalent of `omp parallel do`.
++2. `device`: this maps `do concurent` loops to run in parallel on a device
++   (GPU). This maps such loops to the equivalent of `omp target teams
++   distribute parallel do`.
++3. `none`: this disables `do concurrent` mapping altogether. In such case, such
++   loops are emitted as sequential loops.
++
++The above compiler switch is currently avaialble only when OpenMP is also
++enabled. So you need to provide the following options to flang in order to
++enable it:
++```
++flang-new ... -fopenmp -fdo-concurrent-parallel=[host|device|none] ...
++```
++
++## Current status
++
++Under the hood, `do concurrent` mapping is implemented in the
++`DoConcurrentConversionPass`. This is still an experimental pass which means
++that:
++* It has been tested in a very limited way so far.
++* It has been tested on simple synthetic inputs.
++
++To describe current status in more detail, following is a description of how
++the pass currently behaves for single-range loops and then for multi-range
++loops.
++
++### Single-range loops
++
++Given the following loop:
++```fortran
++  do concurrent(i=1:n)
++    a(i) = i * i
++  end do
++```
++
++#### Mapping to `host`
++
++Mapping this loop to the `host`, generates MLIR operations of the following
++structure:
++
++```mlir
++%4 = fir.address_of(@_QFEa) ...
++%6:2 = hlfir.declare %4 ...
++
++omp.parallel {
++  // Allocate private copy for `i`.
++  %19 = fir.alloca i32 {bindc_name = "i"}
++  %20:2 = hlfir.declare %19 {uniq_name = "_QFEi"} ...
++
++  omp.wsloop {
++    omp.loop_nest (%arg0) : index = (%21) to (%22) inclusive step (%c1_2) {
++      %23 = fir.convert %arg0 : (index) -> i32
++      // Use the privatized version of `i`.
++      fir.store %23 to %20#1 : !fir.ref<i32>
++      ...
++
++      // Use "shared" SSA value of `a`.
++      %42 = hlfir.designate %6#0
++      hlfir.assign %35 to %42
++      ...
++      omp.yield
++    }
++    omp.terminator
++  }
++  omp.terminator
++}
++```
++
++#### Mapping to `device`
++
++Mapping the same loop to the `device`, generates MLIR operations of the
++following structure:
++
++```mlir
++// Map `a` to the `target` region.
++%29 = omp.map.info ... {name = "_QFEa"}
++omp.target ... map_entries(..., %29 -> %arg4 ...) {
++  ...
++  %51:2 = hlfir.declare %arg4
++  ...
++  omp.teams {
++    // Allocate private copy for `i`.
++    %52 = fir.alloca i32 {bindc_name = "i"}
++    %53:2 = hlfir.declare %52
++    ...
++
++    omp.distribute {
++      omp.parallel {
++        omp.wsloop {
++          omp.loop_nest (%arg5) : index = (%54) to (%55) inclusive step (%c1_9) {
++            // Use the privatized version of `i`.
++            %56 = fir.convert %arg5 : (index) -> i32
++            fir.store %56 to %53#1
++            ...
++            // Use the mapped version of `a`.
++            ... = hlfir.designate %51#0
++            ...
++          }
++          omp.terminator
++        }
++        omp.terminator
++      }
++      omp.terminator
++    }
++    omp.terminator
++  }
++  omp.terminator
++}
++```
++
++### Multi-range loops
++
++The pass currently supports multi-range loops as well. Given the following
++example:
++
++```fortran
++   do concurrent(i=1:n, j=1:m)
++       a(i,j) = i * j
++   end do
++```
++
++The generated `omp.loop_nest` operation look like:
++
++```mlir
++omp.loop_nest (%arg0, %arg1)
++    : index = (%17, %19) to (%18, %20)
++    inclusive step (%c1_2, %c1_4) {
++  fir.store %arg0 to %private_i#1 : !fir.ref<i32>
++  fir.store %arg1 to %private_j#1 : !fir.ref<i32>
++  ...
++  omp.yield
++}
++```
++
++It is worth noting that we have privatized versions for both iteration
++variables: `i` and `j`. These are locally allocated inside the parallel/target
++OpenMP region similar to what the single-range example in previous section
++shows.
++
++#### Multi-range and perfectly-nested loops
++
++Currently, on the `FIR` dialect level, the following 2 loops are modelled in
++exactly the same way:
++
++```fortran
++do concurrent(i=1:n, j=1:m)
++  a(i,j) = i * j
++end do
++```
++
++```fortran
++do concurrent(i=1:n)
++  do concurrent(j=1:m)
++    a(i,j) = i * j
++  end do
++end do
++```
++
++Both of the above loops are modelled as:
++
++```mlir
++fir.do_loop %arg0 = %11 to %12 step %c1 unordered {
++  ...
++  fir.do_loop %arg1 = %14 to %15 step %c1_1 unordered {
++    ...
++  }
++}
++```
++
++Consequently, from the `DoConcurrentConversionPass`' perspective, both loops
++are treated in the same manner. Under the hood, the pass detects
++perfectly-nested loop nests and maps such nests as if they were multi-range
++loops.
++
++#### Non-perfectly-nested loops
++
++One limitation that the pass currently have is that it treats any intervening
++code in a loop nest as being disruptive to detecting that nest as a single
++unit. For example, given the following input:
++
++```fortran
++do concurrent(i=1:n)
++  x = 41
++  do concurrent(j=1:m)
++    a(i,j) = i * j
++  end do
++end do
++```
++
++Since there at least one statement between the 2 loop header (i.e. `x = 41`),
++the pass does not detect the `i` and `j` loops as a nest. Rather, the pass in
++that case only maps the `i` loop to OpenMP and leaves the `j` loop in its
++origianl form. In theory, in this example, we can sink the intervening code
++into the `j` loop and detect the complete nest. However, such transformation is
++still to be implemented in the future.
++
++The above also has the consequence that the `j` variable will **not** be
++privatized in the OpenMP parallel/target region. In other words, it will be
++treated as if it was a `shared` variable. For more details about privatization,
++see the "Data environment" section below.
++
++### Data environment
++
++By default, variables that are used inside a `do concurernt` loop nest are
++either treated as `shared` in case of mapping to `host`, or mapped into the
++`target` region using a `map` clause in case of mapping to `device`. The only
++exceptions to this are:
++  1. the loop's iteration variable(s) (IV) of **perfect** loop nests. In that
++     case, for each IV, we allocate a local copy as shown the by the mapping
++     examples above.
++  1. any values that are from allocations outside the loop nest and used
++     exclusively inside of it. In such cases, a local privatized
++     value is created in the OpenMP region to prevent multiple teams of threads
++     from accessing and destroying the same memory block which causes runtime
++     issues. For an example of such cases, see
++     `flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90`.
++
++#### Non-perfectly-nested loops' IVs
++
++For non-perfectly-nested loops, the IVs are still treated as `shared` or
++`map` entries as pointed out above. This **might not** be consistent with what
++the Fortran specficiation tells us. In particular, taking the following
++snippets from the spec (version 2023) into account:
++
++> § 3.35
++> ------
++> construct entity
++> entity whose identifier has the scope of a construct
++
++> § 19.4
++> ------
++>  A variable that appears as an index-name in a FORALL or DO CONCURRENT
++>  construct, or ... is a construct entity. A variable that has LOCAL or
++>  LOCAL_INIT locality in a DO CONCURRENT construct is a construct entity.
++> ...
++> The name of a variable that appears as an index-name in a DO CONCURRENT
++> construct, FORALL statement, or FORALL construct has a scope of the statement
++> or construct. A variable that has LOCAL or LOCAL_INIT locality in a DO
++> CONCURRENT construct has the scope of that construct.
++
++From the above quotes, it seems there is an equivalence between the IV of a `do
++concurrent` loop and a variable with a `LOCAL` locality specifier (equivalent
++to OpenMP's `private` clause). Which means that we should probably
++localize/privatize a `do concurernt` loop's IV even if it is not perfectly
++nested in the nest we are parallelizing. For now, however, we **do not** do
++that as pointed out previously. In the near future, we propose a middle-ground
++solution (see the Next steps section for more details).
++
++## Next steps
++
++### Delayed privatization
++
++So far, we emit the privatization logic for IVs inline in the parallel/target
++region. This is enough for our purposes right now since we don't
++localize/privatize any sophisticated types of variables yet. Once we have need
++for more advanced localization through `do concurrent`'s locality specifiers
++(see below), delayed privatization will enable us to have a much cleaner IR.
++Once delayed privatization's implementation upstream is supported for the
++required constructs by the pass, we will move to it rather than inlined/early
++privatization.
++
++### Locality specifiers for `do concurrent`
++
++Locality specifiers will enable the user to control the data environment of the
++loop nest in a more fine-grained way. Implementing these specifiers on the
++`FIR` dialect level is needed in order to support this in the
++`DoConcurrentConversionPass`.
++
++Such specified will also unlock a potential solution to the
++non-perfectly-nested loops' IVs issue described above. In particular, for a
++non-perfectly nested loop, one middle-ground proposal/solution would be to:
++* Emit the loop's IV as shared/mapped just like we do currently.
++* Emit a warning that the IV of the loop is emitted as shared/mapped.
++* Given support for `LOCAL`, we can recommend the user to explicitly
++  localize/privatize the loop's IV if they choose to.
++
++### More advanced detection of loop nests
++
++As pointed out earlier, any intervening code between the headers of 2 nested
++`do concurrent` loops prevents us currently from detecting this as a loop nest.
++In some cases this is overly conservative. Therefore, a more flexible detection
++logic of loop nests needs to be implemented.
++
++### Data-dependence analysis
++
++Right now, we map loop nests without analysing whether such mapping is safe to
++do or not. We probalby need to at least warn the use of unsafe loop nests due
++to loop-carried dependencies.
++
++### Non-rectangular loop nests
++
++So far, we did not need to use the pass for non-rectangular loop nests. For
++example:
++```fortran
++do concurrent(i=1:n)
++  do concurrent(j=i:n)
++    ...
++  end do
++end do
++```
++We defer this to the (hopefully) near future when we get the conversion in a
++good share for the samples/projects at hand.
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Decimal/binary-floating-point.h llvm-project-aso/flang/include/flang/Decimal/binary-floating-point.h
+--- llvm-project-aso-orig/flang/include/flang/Decimal/binary-floating-point.h	2024-08-27 20:36:25.176173639 -0500
++++ llvm-project-aso/flang/include/flang/Decimal/binary-floating-point.h	2024-11-23 20:39:47.176175380 -0600
+@@ -32,6 +32,7 @@
+ 
+ template <int BINARY_PRECISION> class BinaryFloatingPointNumber {
+ public:
++  RT_OFFLOAD_VAR_GROUP_BEGIN
+   static constexpr common::RealCharacteristics realChars{BINARY_PRECISION};
+   static constexpr int binaryPrecision{BINARY_PRECISION};
+   static constexpr int bits{realChars.bits};
+@@ -47,7 +48,6 @@
+ 
+   using RawType = common::HostUnsignedIntType<bits>;
+   static_assert(CHAR_BIT * sizeof(RawType) >= bits);
+-  RT_OFFLOAD_VAR_GROUP_BEGIN
+   static constexpr RawType significandMask{(RawType{1} << significandBits) - 1};
+ 
+   constexpr RT_API_ATTRS BinaryFloatingPointNumber() {} // zero
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Frontend/CodeGenOptions.def llvm-project-aso/flang/include/flang/Frontend/CodeGenOptions.def
+--- llvm-project-aso-orig/flang/include/flang/Frontend/CodeGenOptions.def	2024-08-27 20:36:25.176173639 -0500
++++ llvm-project-aso/flang/include/flang/Frontend/CodeGenOptions.def	2024-11-23 20:39:47.176175380 -0600
+@@ -35,10 +35,13 @@
+ CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
+ 
+ CODEGENOPT(Underscoring, 1, 1)
++CODEGENOPT(OffloadGlobalFiltering, 1, 1)
+ ENUM_CODEGENOPT(RelocationModel, llvm::Reloc::Model, 3, llvm::Reloc::PIC_) ///< Name of the relocation model to use.
+ ENUM_CODEGENOPT(DebugInfo,  llvm::codegenoptions::DebugInfoKind, 4,  llvm::codegenoptions::NoDebugInfo) ///< Level of debug info to generate
  ENUM_CODEGENOPT(VecLib, llvm::driver::VectorLibrary, 3, llvm::driver::VectorLibrary::NoLibrary) ///< Vector functions library to use
  ENUM_CODEGENOPT(FramePointer, llvm::FramePointerKind, 2, llvm::FramePointerKind::None) ///< Enable the usage of frame pointers
  
@@ -3526,32 +3935,32 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Frontend/Cod
 +
  #undef CODEGENOPT
  #undef ENUM_CODEGENOPT
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Frontend/CodeGenOptions.h llvm-project/flang/include/flang/Frontend/CodeGenOptions.h
---- llvm-project.orig/flang/include/flang/Frontend/CodeGenOptions.h	2024-06-12 10:43:12.596210747 -0500
-+++ llvm-project/flang/include/flang/Frontend/CodeGenOptions.h	2024-06-12 10:44:09.347614281 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Frontend/CodeGenOptions.h llvm-project-aso/flang/include/flang/Frontend/CodeGenOptions.h
+--- llvm-project-aso-orig/flang/include/flang/Frontend/CodeGenOptions.h	2024-10-18 17:40:32.468992659 -0500
++++ llvm-project-aso/flang/include/flang/Frontend/CodeGenOptions.h	2024-11-23 20:39:47.176175380 -0600
 @@ -15,6 +15,7 @@
  #ifndef FORTRAN_FRONTEND_CODEGENOPTIONS_H
  #define FORTRAN_FRONTEND_CODEGENOPTIONS_H
  
-+#include "flang/Optimizer/Transforms/Utils.h"
++#include "flang/Optimizer/OpenMP/Utils.h"
  #include "llvm/Frontend/Debug/Options.h"
  #include "llvm/Frontend/Driver/CodeGenOptions.h"
  #include "llvm/Support/CodeGen.h"
-@@ -129,6 +130,10 @@
-   /// transformation.
-   OptRemark OptimizationRemarkAnalysis;
+@@ -143,6 +144,10 @@
+   /// (-mlarge-data-threshold).
+   uint64_t LargeDataThreshold;
  
 +  /// Optionally map `do concurrent` loops to OpenMP. This is only valid of
 +  /// OpenMP is enabled.
-+  using DoConcurrentMappingKind = fir::omp::DoConcurrentMappingKind;
++  using DoConcurrentMappingKind = flangomp::DoConcurrentMappingKind;
 +
    // Define accessors/mutators for code generation options of enumeration type.
  #define CODEGENOPT(Name, Bits, Default)
  #define ENUM_CODEGENOPT(Name, Type, Bits, Default)                             \
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP/Clauses.h llvm-project/flang/include/flang/Lower/OpenMP/Clauses.h
---- llvm-project.orig/flang/include/flang/Lower/OpenMP/Clauses.h	1969-12-31 18:00:00.000000000 -0600
-+++ llvm-project/flang/include/flang/Lower/OpenMP/Clauses.h	2024-06-12 10:44:09.347614281 -0500
-@@ -0,0 +1,312 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Lower/OpenMP/Clauses.h llvm-project-aso/flang/include/flang/Lower/OpenMP/Clauses.h
+--- llvm-project-aso-orig/flang/include/flang/Lower/OpenMP/Clauses.h	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/include/flang/Lower/OpenMP/Clauses.h	2024-11-23 20:39:47.176175380 -0600
+@@ -0,0 +1,330 @@
 +//===-- Clauses.h -- OpenMP clause handling -------------------------------===//
 +//
 +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -3563,6 +3972,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP
 +#define FORTRAN_LOWER_OPENMP_CLAUSES_H
 +
 +#include "flang/Evaluate/expression.h"
++#include "flang/Evaluate/type.h"
 +#include "flang/Parser/parse-tree.h"
 +#include "flang/Semantics/expression.h"
 +#include "flang/Semantics/semantics.h"
@@ -3583,12 +3993,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP
 +using namespace Fortran;
 +using SomeExpr = semantics::SomeExpr;
 +using MaybeExpr = semantics::MaybeExpr;
-+
-+// evaluate::SomeType doesn't provide == operation. It's not really used in
-+// flang's clauses so far, so a trivial implementation is sufficient.
-+struct TypeTy : public evaluate::SomeType {
-+  bool operator==(const TypeTy &t) const { return true; }
-+};
++using TypeTy = evaluate::DynamicType;
 +
 +template <typename ExprTy>
 +struct IdTyTemplate {
@@ -3609,6 +4014,13 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP
 +    return designator == other.designator;
 +  }
 +
++  // Defining an "ordering" which allows types derived from this to be
++  // utilised in maps and other containers that require comparison
++  // operators for ordering
++  bool operator<(const IdTyTemplate &other) const {
++    return symbol < other.symbol;
++  }
++
 +  operator bool() const { return symbol != nullptr; }
 +};
 +
@@ -3630,6 +4042,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP
 +  Fortran::semantics::Symbol *sym() const { return identity.symbol; }
 +  const std::optional<ExprTy> &ref() const { return identity.designator; }
 +
++  bool operator<(const ObjectT<IdTy, ExprTy> &other) const {
++    return identity < other.identity;
++  }
++
 +  IdTy identity;
 +};
 +} // namespace tomp::type
@@ -3704,15 +4120,20 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP
 +                                    semantics::SemanticsContext &semaCtx);
 +
 +namespace clause {
++using Range = tomp::type::RangeT<ExprTy>;
++using Iterator = tomp::type::IteratorT<TypeTy, IdTy, ExprTy>;
++using IteratorSpecifier = tomp::type::IteratorSpecifierT<TypeTy, IdTy, ExprTy>;
 +using DefinedOperator = tomp::type::DefinedOperatorT<IdTy, ExprTy>;
 +using ProcedureDesignator = tomp::type::ProcedureDesignatorT<IdTy, ExprTy>;
 +using ReductionOperator = tomp::type::ReductionIdentifierT<IdTy, ExprTy>;
++using DependenceType = tomp::type::DependenceType;
 +
 +// "Requires" clauses are handled early on, and the aggregated information
 +// is stored in the Symbol details of modules, programs, and subprograms.
 +// These clauses are still handled here to cover all alternatives in the
 +// main clause variant.
 +
++using Absent = tomp::clause::AbsentT<TypeTy, IdTy, ExprTy>;
 +using AcqRel = tomp::clause::AcqRelT<TypeTy, IdTy, ExprTy>;
 +using Acquire = tomp::clause::AcquireT<TypeTy, IdTy, ExprTy>;
 +using AdjustArgs = tomp::clause::AdjustArgsT<TypeTy, IdTy, ExprTy>;
@@ -3729,6 +4150,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP
 +using Capture = tomp::clause::CaptureT<TypeTy, IdTy, ExprTy>;
 +using Collapse = tomp::clause::CollapseT<TypeTy, IdTy, ExprTy>;
 +using Compare = tomp::clause::CompareT<TypeTy, IdTy, ExprTy>;
++using Contains = tomp::clause::ContainsT<TypeTy, IdTy, ExprTy>;
 +using Copyin = tomp::clause::CopyinT<TypeTy, IdTy, ExprTy>;
 +using Copyprivate = tomp::clause::CopyprivateT<TypeTy, IdTy, ExprTy>;
 +using Defaultmap = tomp::clause::DefaultmapT<TypeTy, IdTy, ExprTy>;
@@ -3753,6 +4175,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP
 +using Grainsize = tomp::clause::GrainsizeT<TypeTy, IdTy, ExprTy>;
 +using HasDeviceAddr = tomp::clause::HasDeviceAddrT<TypeTy, IdTy, ExprTy>;
 +using Hint = tomp::clause::HintT<TypeTy, IdTy, ExprTy>;
++using Holds = tomp::clause::HoldsT<TypeTy, IdTy, ExprTy>;
 +using If = tomp::clause::IfT<TypeTy, IdTy, ExprTy>;
 +using Inbranch = tomp::clause::InbranchT<TypeTy, IdTy, ExprTy>;
 +using Inclusive = tomp::clause::InclusiveT<TypeTy, IdTy, ExprTy>;
@@ -3767,6 +4190,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP
 +using Match = tomp::clause::MatchT<TypeTy, IdTy, ExprTy>;
 +using Mergeable = tomp::clause::MergeableT<TypeTy, IdTy, ExprTy>;
 +using Message = tomp::clause::MessageT<TypeTy, IdTy, ExprTy>;
++using NoOpenmp = tomp::clause::NoOpenmpT<TypeTy, IdTy, ExprTy>;
++using NoOpenmpRoutines = tomp::clause::NoOpenmpRoutinesT<TypeTy, IdTy, ExprTy>;
++using NoParallelism = tomp::clause::NoParallelismT<TypeTy, IdTy, ExprTy>;
 +using Nocontext = tomp::clause::NocontextT<TypeTy, IdTy, ExprTy>;
 +using Nogroup = tomp::clause::NogroupT<TypeTy, IdTy, ExprTy>;
 +using Nontemporal = tomp::clause::NontemporalT<TypeTy, IdTy, ExprTy>;
@@ -3798,6 +4224,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP
 +using Simdlen = tomp::clause::SimdlenT<TypeTy, IdTy, ExprTy>;
 +using Simd = tomp::clause::SimdT<TypeTy, IdTy, ExprTy>;
 +using Sizes = tomp::clause::SizesT<TypeTy, IdTy, ExprTy>;
++using Permutation = tomp::clause::PermutationT<TypeTy, IdTy, ExprTy>;
 +using TaskReduction = tomp::clause::TaskReductionT<TypeTy, IdTy, ExprTy>;
 +using ThreadLimit = tomp::clause::ThreadLimitT<TypeTy, IdTy, ExprTy>;
 +using Threads = tomp::clause::ThreadsT<TypeTy, IdTy, ExprTy>;
@@ -3864,10 +4291,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP
 +} // namespace Fortran::lower::omp
 +
 +#endif // FORTRAN_LOWER_OPENMP_CLAUSES_H
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP/Utils.h llvm-project/flang/include/flang/Lower/OpenMP/Utils.h
---- llvm-project.orig/flang/include/flang/Lower/OpenMP/Utils.h	1969-12-31 18:00:00.000000000 -0600
-+++ llvm-project/flang/include/flang/Lower/OpenMP/Utils.h	2024-06-12 10:44:09.347614281 -0500
-@@ -0,0 +1,116 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Lower/OpenMP/Utils.h llvm-project-aso/flang/include/flang/Lower/OpenMP/Utils.h
+--- llvm-project-aso-orig/flang/include/flang/Lower/OpenMP/Utils.h	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/include/flang/Lower/OpenMP/Utils.h	2024-11-23 20:39:47.176175380 -0600
+@@ -0,0 +1,169 @@
 +//===-- Lower/OpenMP/Utils.h ------------------------------------*- C++ -*-===//
 +//
 +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -3884,6 +4311,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP
 +#include "mlir/IR/Location.h"
 +#include "mlir/IR/Value.h"
 +#include "llvm/Support/CommandLine.h"
++#include <cstdint>
 +
 +extern llvm::cl::opt<bool> treatIndexAsSection;
 +extern llvm::cl::opt<bool> enableDelayedPrivatization;
@@ -3904,6 +4332,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP
 +} // namespace parser
 +
 +namespace lower {
++class StatementContext;
 +namespace pft {
 +struct Evaluation;
 +}
@@ -3919,38 +4348,97 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP
 +// and index data when lowering OpenMP map clauses. Keeps track of the
 +// placement of the component in the derived type hierarchy it rests within,
 +// alongside the generated mlir::omp::MapInfoOp for the mapped component.
-+struct OmpMapMemberIndicesData {
++//
++// As an example of what the contents of this data structure may be like,
++// when provided the following derived type and map of that type:
++//
++// type :: bottom_layer
++//   real(8) :: i2
++//   real(4) :: array_i2(10)
++//   real(4) :: array_j2(10)
++// end type bottom_layer
++//
++// type :: top_layer
++//   real(4) :: i
++//   integer(4) :: array_i(10)
++//   real(4) :: j
++//   type(bottom_layer) :: nested
++//   integer, allocatable :: array_j(:)
++//   integer(4) :: k
++// end type top_layer
++//
++// type(top_layer) :: top_dtype
++//
++// map(tofrom: top_dtype%nested%i2, top_dtype%k, top_dtype%nested%array_i2)
++//
++// We would end up with an OmpMapParentAndMemberData populated like below:
++//
++// memberPlacementIndices:
++//  Vector 1: 3, 0
++//  Vector 2: 5
++//  Vector 3: 3, 1
++//
++// memberMap:
++// Entry 1: omp.map.info for "top_dtype%nested%i2"
++// Entry 2: omp.map.info for "top_dtype%k"
++// Entry 3: omp.map.info for "top_dtype%nested%array_i2"
++//
++// And this OmpMapParentAndMemberData would be accessed via the parent
++// symbol for top_dtype. Other parent derived type instances that have
++// members mapped would have there own OmpMapParentAndMemberData entry
++// accessed via their own symbol.
++struct OmpMapParentAndMemberData {
 +  // The indices representing the component members placement in its derived
 +  // type parents hierarchy.
-+  llvm::SmallVector<int> memberPlacementIndices;
++  llvm::SmallVector<llvm::SmallVector<int64_t>> memberPlacementIndices;
 +
 +  // Placement of the member in the member vector.
-+  mlir::omp::MapInfoOp memberMap;
++  llvm::SmallVector<mlir::omp::MapInfoOp> memberMap;
++
++  bool isDuplicateMemberMapInfo(llvm::SmallVectorImpl<int64_t> &memberIndices) {
++    return llvm::find_if(memberPlacementIndices, [&](auto &memberData) {
++             return llvm::equal(memberIndices, memberData);
++           }) != memberPlacementIndices.end();
++  }
++
++  void addChildIndexAndMapToParent(const omp::Object &object,
++                                   mlir::omp::MapInfoOp &mapOp,
++                                   semantics::SemanticsContext &semaCtx);
 +};
 +
 +mlir::omp::MapInfoOp
-+createMapInfoOp(mlir::OpBuilder &builder, mlir::Location loc,
-+                mlir::Value baseAddr, mlir::Value varPtrPtr, std::string name,
-+                mlir::ArrayRef<mlir::Value> bounds,
-+                mlir::ArrayRef<mlir::Value> members,
-+                mlir::DenseIntElementsAttr membersIndex, uint64_t mapType,
++createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
++                mlir::Value baseAddr, mlir::Value varPtrPtr,
++                llvm::StringRef name, llvm::ArrayRef<mlir::Value> bounds,
++                llvm::ArrayRef<mlir::Value> members,
++                mlir::ArrayAttr membersIndex, uint64_t mapType,
 +                mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy,
 +                bool partialMap = false);
 +
-+void addChildIndexAndMapToParent(
-+    const omp::Object &object,
-+    std::map<const semantics::Symbol *,
-+             llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
-+    mlir::omp::MapInfoOp &mapOp, semantics::SemanticsContext &semaCtx);
-+
 +void insertChildMapInfoIntoParent(
-+    lower::AbstractConverter &converter,
-+    std::map<const semantics::Symbol *,
-+             llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
++    Fortran::lower::AbstractConverter &converter,
++    Fortran::semantics::SemanticsContext &semaCtx,
++    Fortran::lower::StatementContext &stmtCtx,
++    std::map<Object, OmpMapParentAndMemberData> &parentMemberIndices,
 +    llvm::SmallVectorImpl<mlir::Value> &mapOperands,
-+    llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms,
-+    llvm::SmallVectorImpl<mlir::Type> *mapSymTypes,
-+    llvm::SmallVectorImpl<mlir::Location> *mapSymLocs);
++    llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms);
++
++void generateMemberPlacementIndices(
++    const Object &object, llvm::SmallVectorImpl<int64_t> &indices,
++    Fortran::semantics::SemanticsContext &semaCtx);
++
++bool isMemberOrParentAllocatableOrPointer(
++    const Object &object, Fortran::semantics::SemanticsContext &semaCtx);
++
++mlir::Value createParentSymAndGenIntermediateMaps(
++    mlir::Location clauseLocation, Fortran::lower::AbstractConverter &converter,
++    semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx,
++    omp::ObjectList &objectList, llvm::SmallVectorImpl<int64_t> &indices,
++    OmpMapParentAndMemberData &parentMemberIndices, llvm::StringRef asFortran,
++    llvm::omp::OpenMPOffloadMappingFlags mapTypeBits);
++
++omp::ObjectList gatherObjectsOf(omp::Object derivedTypeMember,
++                                semantics::SemanticsContext &semaCtx);
 +
 +mlir::Type getLoopVarType(lower::AbstractConverter &converter,
 +                          std::size_t loopVarTypeSize);
@@ -3964,74 +4452,75 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Lower/OpenMP
 +
 +int64_t getCollapseValue(const List<Clause> &clauses);
 +
-+semantics::Symbol *getOmpObjectSymbol(const parser::OmpObject &ompObject);
-+
 +void genObjectList(const ObjectList &objects,
 +                   lower::AbstractConverter &converter,
 +                   llvm::SmallVectorImpl<mlir::Value> &operands);
 +
-+// TODO: consider moving this to the `omp.loop_nest` op. Would be something like
-+// this:
-+//
-+// ```
-+// mlir::Value LoopNestOp::calculateTripCount(mlir::OpBuilder &builder,
-+// mlir::OpBuilder::InsertPoint ip)
-+// ```
-+mlir::Value calculateTripCount(fir::FirOpBuilder &builder, mlir::Location loc,
-+                               const mlir::omp::CollapseClauseOps &ops);
++void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp,
++                                     mlir::Location loc);
++
 +} // namespace omp
 +} // namespace lower
 +} // namespace Fortran
 +
 +#endif // FORTRAN_LOWER_OPENMPUTILS_H
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Optimizer/Transforms/Passes.h llvm-project/flang/include/flang/Optimizer/Transforms/Passes.h
---- llvm-project.orig/flang/include/flang/Optimizer/Transforms/Passes.h	2024-06-12 10:43:12.604210663 -0500
-+++ llvm-project/flang/include/flang/Optimizer/Transforms/Passes.h	2024-06-12 10:44:09.347614281 -0500
-@@ -10,10 +10,12 @@
- #define FORTRAN_OPTIMIZER_TRANSFORMS_PASSES_H
- 
- #include "flang/Optimizer/Dialect/FIROps.h"
-+#include "flang/Optimizer/Transforms/Utils.h"
- #include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
- #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h llvm-project-aso/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
+--- llvm-project-aso-orig/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h	2024-11-14 15:28:41.122642523 -0600
++++ llvm-project-aso/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h	2024-11-23 20:39:47.176175380 -0600
+@@ -67,7 +67,7 @@
+   //  end subroutine
+   //  -------------------------------------------------
+   //
+-  //  flang -fc1 -emit-fir test.f90 -o test.fir
++  //  flang-new -fc1 -emit-fir test.f90 -o test.fir
+   //
+   //  ------------------- test.fir --------------------
+   //  fir.global @_QMtopEa : !fir.box<!fir.ptr<!fir.array<?xf32>>>
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Optimizer/OpenMP/Passes.h llvm-project-aso/flang/include/flang/Optimizer/OpenMP/Passes.h
+--- llvm-project-aso-orig/flang/include/flang/Optimizer/OpenMP/Passes.h	2024-11-23 20:25:26.831275207 -0600
++++ llvm-project-aso/flang/include/flang/Optimizer/OpenMP/Passes.h	2024-11-23 20:39:47.176175380 -0600
+@@ -13,6 +13,7 @@
+ #ifndef FORTRAN_OPTIMIZER_OPENMP_PASSES_H
+ #define FORTRAN_OPTIMIZER_OPENMP_PASSES_H
+ 
++#include "flang/Optimizer/OpenMP/Utils.h"
+ #include "mlir/Dialect/Func/IR/FuncOps.h"
+ #include "mlir/IR/BuiltinOps.h"
  #include "mlir/Pass/Pass.h"
- #include "mlir/Pass/PassRegistry.h"
-+
+@@ -21,6 +22,9 @@
  #include <memory>
  
- namespace mlir {
-@@ -39,6 +41,7 @@
- #define GEN_PASS_DECL_ASSUMEDRANKOPCONVERSION
- #define GEN_PASS_DECL_CHARACTERCONVERSION
- #define GEN_PASS_DECL_CFGCONVERSION
-+#define GEN_PASS_DECL_DOCONCURRENTCONVERSIONPASS
- #define GEN_PASS_DECL_EXTERNALNAMECONVERSION
- #define GEN_PASS_DECL_MEMREFDATAFLOWOPT
- #define GEN_PASS_DECL_SIMPLIFYINTRINSICS
-@@ -76,6 +79,8 @@
- std::unique_ptr<mlir::Pass>
- createVScaleAttrPass(std::pair<unsigned, unsigned> vscaleAttr);
- 
+ namespace flangomp {
++
 +std::unique_ptr<mlir::Pass> createDoConcurrentConversionPass(bool mapToDevice);
 +
- void populateCfgConversionRewrites(mlir::RewritePatternSet &patterns,
-                                    bool forceLoopToExecuteOnce = false,
-                                    bool setNSW = false);
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Optimizer/Transforms/Passes.td llvm-project/flang/include/flang/Optimizer/Transforms/Passes.td
---- llvm-project.orig/flang/include/flang/Optimizer/Transforms/Passes.td	2024-06-12 10:43:12.604210663 -0500
-+++ llvm-project/flang/include/flang/Optimizer/Transforms/Passes.td	2024-06-12 10:44:09.347614281 -0500
-@@ -15,6 +15,7 @@
- #define FLANG_OPTIMIZER_TRANSFORMS_PASSES
+ #define GEN_PASS_DECL
+ #define GEN_PASS_REGISTRATION
+ #include "flang/Optimizer/OpenMP/Passes.h.inc"
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Optimizer/OpenMP/Passes.td llvm-project-aso/flang/include/flang/Optimizer/OpenMP/Passes.td
+--- llvm-project-aso-orig/flang/include/flang/Optimizer/OpenMP/Passes.td	2024-11-23 20:25:26.831275207 -0600
++++ llvm-project-aso/flang/include/flang/Optimizer/OpenMP/Passes.td	2024-11-23 20:39:47.176175380 -0600
+@@ -10,6 +10,7 @@
+ #define FORTRAN_OPTIMIZER_OPENMP_PASSES
  
  include "mlir/Pass/PassBase.td"
 +include "mlir/IR/EnumAttr.td"
  
- def AbstractResultOpt
-   : Pass<"abstract-result"> {
-@@ -408,4 +409,35 @@
+ def MapInfoFinalizationPass
+     : Pass<"omp-map-info-finalization", "mlir::ModuleOp"> {
+@@ -50,6 +51,46 @@
    ];
  }
  
++def GlobalFilteringPass : Pass<"omp-global-filtering"> {
++  let summary = "Filters out globals intended for the host when compiling "
++                "for the target device.";
++  let dependentDialects = [
++    "mlir::func::FuncDialect",
++    "fir::FIROpsDialect"
++  ];
++}
++
 +def DoConcurrentConversionPass : Pass<"fopenmp-do-concurrent-conversion", "mlir::func::FuncOp"> {
 +  let summary = "Map `DO CONCURRENT` loops to OpenMP worksharing loops.";
 +
@@ -4049,87 +4538,278 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Optimizer/Tr
 +
 +  let options = [
 +    Option<"mapTo", "map-to",
-+           "fir::omp::DoConcurrentMappingKind",
-+           /*default=*/"fir::omp::DoConcurrentMappingKind::DCMK_None",
++           "flangomp::DoConcurrentMappingKind",
++           /*default=*/"flangomp::DoConcurrentMappingKind::DCMK_None",
 +           "Try to map `do concurrent` loops to OpenMP (on host or device)",
 +           [{::llvm::cl::values(
-+               clEnumValN(fir::omp::DoConcurrentMappingKind::DCMK_None,
++               clEnumValN(flangomp::DoConcurrentMappingKind::DCMK_None,
 +                          "none", "Do not lower `do concurrent` to OpenMP"),
-+               clEnumValN(fir::omp::DoConcurrentMappingKind::DCMK_Host,
++               clEnumValN(flangomp::DoConcurrentMappingKind::DCMK_Host,
 +                          "host", "Lower to run in parallel on the CPU"),
-+               clEnumValN(fir::omp::DoConcurrentMappingKind::DCMK_Device,
++               clEnumValN(flangomp::DoConcurrentMappingKind::DCMK_Device,
 +                          "device", "Lower to run in parallel on the GPU")
 +           )}]>,
 +  ];
 +}
 +
- #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Optimizer/Transforms/Utils.h llvm-project/flang/include/flang/Optimizer/Transforms/Utils.h
---- llvm-project.orig/flang/include/flang/Optimizer/Transforms/Utils.h	2024-06-12 10:43:12.604210663 -0500
-+++ llvm-project/flang/include/flang/Optimizer/Transforms/Utils.h	2024-06-12 10:44:09.347614281 -0500
-@@ -13,8 +13,13 @@
- #ifndef FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H
- #define FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H
- 
-+#include "mlir/IR/Location.h"
-+#include "mlir/IR/Value.h"
-+
- namespace fir {
- 
-+class FirOpBuilder;
+ // Needs to be scheduled on Module as we create functions in it
+ def LowerWorkshare : Pass<"lower-workshare", "::mlir::ModuleOp"> {
+   let summary = "Lower workshare construct";
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Optimizer/OpenMP/Utils.h llvm-project-aso/flang/include/flang/Optimizer/OpenMP/Utils.h
+--- llvm-project-aso-orig/flang/include/flang/Optimizer/OpenMP/Utils.h	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/include/flang/Optimizer/OpenMP/Utils.h	2024-11-23 20:39:47.176175380 -0600
+@@ -0,0 +1,26 @@
++//===-- Optimizer/OpenMP/Utils.h --------------------------------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef FORTRAN_OPTIMIZER_OPENMP_UTILS_H
++#define FORTRAN_OPTIMIZER_OPENMP_UTILS_H
++
++namespace flangomp {
 +
- using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
-     fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
-     mlir::Value, mlir::Value, const llvm::SmallVectorImpl<mlir::Value> &)>;
-@@ -33,6 +38,13 @@
-                                mlir::Type maskElemType, mlir::Value resultArr,
-                                bool maskMayBeLogicalScalar);
- 
-+namespace omp {
 +enum class DoConcurrentMappingKind {
-+  DCMK_None,  // Do not lower `do concurrent` to OpenMP.
-+  DCMK_Host,  // Lower to run in parallel on the CPU.
-+  DCMK_Device // Lower to run in parallel on the GPU.
++  DCMK_None,  ///< Do not lower `do concurrent` to OpenMP.
++  DCMK_Host,  ///< Lower to run in parallel on the CPU.
++  DCMK_Device ///< Lower to run in parallel on the GPU.
 +};
-+}
- } // namespace fir
- 
- #endif // FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/include/flang/Tools/CLOptions.inc llvm-project/flang/include/flang/Tools/CLOptions.inc
---- llvm-project.orig/flang/include/flang/Tools/CLOptions.inc	2024-06-12 10:43:12.608210621 -0500
-+++ llvm-project/flang/include/flang/Tools/CLOptions.inc	2024-06-12 10:44:09.347614281 -0500
-@@ -332,6 +332,9 @@
-   pm.addPass(hlfir::createConvertHLFIRtoFIR());
- }
++
++} // namespace flangomp
++
++#endif // FORTRAN_OPTIMIZER_OPENMP_UTILS_H
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Optimizer/Passes/Pipelines.h llvm-project-aso/flang/include/flang/Optimizer/Passes/Pipelines.h
+--- llvm-project-aso-orig/flang/include/flang/Optimizer/Passes/Pipelines.h	2024-11-23 20:25:26.831275207 -0600
++++ llvm-project-aso/flang/include/flang/Optimizer/Passes/Pipelines.h	2024-11-23 20:39:47.176175380 -0600
+@@ -126,6 +126,15 @@
+     mlir::PassManager &pm, bool enableOpenMP,
+     llvm::OptimizationLevel optLevel = defaultOptLevel);
  
 +using DoConcurrentMappingKind =
 +    Fortran::frontend::CodeGenOptions::DoConcurrentMappingKind;
++
++struct OpenMPFIRPassPipelineOpts {
++  bool isTargetDevice;
++  bool enableOffloadGlobalFiltering;
++  DoConcurrentMappingKind doConcurrentMappingKind;
++};
 +
  /// Create a pass pipeline for handling certain OpenMP transformations needed
  /// prior to FIR lowering.
  ///
-@@ -341,10 +344,15 @@
+@@ -135,7 +144,8 @@
  /// \param pm - MLIR pass manager that will hold the pipeline definition.
  /// \param isTargetDevice - Whether code is being generated for a target device
  /// rather than the host device.
--inline void createOpenMPFIRPassPipeline(
--    mlir::PassManager &pm, bool isTargetDevice) {
-+inline void createOpenMPFIRPassPipeline(mlir::PassManager &pm,
-+    bool isTargetDevice, DoConcurrentMappingKind doConcurrentMappingKind) {
-+  if (doConcurrentMappingKind != DoConcurrentMappingKind::DCMK_None)
-+    pm.addPass(fir::createDoConcurrentConversionPass(
-+        doConcurrentMappingKind == DoConcurrentMappingKind::DCMK_Device));
-+
-   addNestedPassToAllTopLevelOperations(
-       pm, fir::createOMPMapInfoFinalizationPass);
-+
-   pm.addPass(fir::createOMPMarkDeclareTargetPass());
-   if (isTargetDevice)
-     pm.addPass(fir::createOMPFunctionFiltering());
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Frontend/CompilerInvocation.cpp llvm-project/flang/lib/Frontend/CompilerInvocation.cpp
---- llvm-project.orig/flang/lib/Frontend/CompilerInvocation.cpp	2024-06-12 10:43:12.612210579 -0500
-+++ llvm-project/flang/lib/Frontend/CompilerInvocation.cpp	2024-06-12 10:44:09.347614281 -0500
-@@ -155,6 +155,32 @@
+-void createOpenMPFIRPassPipeline(mlir::PassManager &pm, bool isTargetDevice);
++void createOpenMPFIRPassPipeline(mlir::PassManager &pm,
++                                 OpenMPFIRPassPipelineOpts opts);
+ 
+ #if !defined(FLANG_EXCLUDE_CODEGEN)
+ void createDebugPasses(mlir::PassManager &pm,
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Optimizer/Transforms/Passes.h llvm-project-aso/flang/include/flang/Optimizer/Transforms/Passes.h
+--- llvm-project-aso-orig/flang/include/flang/Optimizer/Transforms/Passes.h	2024-10-29 11:07:19.325635688 -0500
++++ llvm-project-aso/flang/include/flang/Optimizer/Transforms/Passes.h	2024-11-23 20:39:47.176175380 -0600
+@@ -10,10 +10,12 @@
+ #define FORTRAN_OPTIMIZER_TRANSFORMS_PASSES_H
+ 
+ #include "flang/Optimizer/Dialect/FIROps.h"
++#include "flang/Optimizer/Transforms/Utils.h"
+ #include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
+ #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+ #include "mlir/Pass/Pass.h"
+ #include "mlir/Pass/PassRegistry.h"
++
+ #include <memory>
+ 
+ namespace mlir {
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Optimizer/Transforms/Utils.h llvm-project-aso/flang/include/flang/Optimizer/Transforms/Utils.h
+--- llvm-project-aso-orig/flang/include/flang/Optimizer/Transforms/Utils.h	2024-08-27 20:36:25.188173519 -0500
++++ llvm-project-aso/flang/include/flang/Optimizer/Transforms/Utils.h	2024-11-23 20:39:47.176175380 -0600
+@@ -13,8 +13,13 @@
+ #ifndef FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H
+ #define FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H
+ 
++#include "mlir/IR/Location.h"
++#include "mlir/IR/Value.h"
++
+ namespace fir {
+ 
++class FirOpBuilder;
++
+ using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
+     fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
+     mlir::Value, mlir::Value, const llvm::SmallVectorImpl<mlir::Value> &)>;
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Runtime/allocator-registry.h llvm-project-aso/flang/include/flang/Runtime/allocator-registry.h
+--- llvm-project-aso-orig/flang/include/flang/Runtime/allocator-registry.h	2024-10-18 17:40:32.476992577 -0500
++++ llvm-project-aso/flang/include/flang/Runtime/allocator-registry.h	2024-11-23 20:39:47.176175380 -0600
+@@ -13,6 +13,8 @@
+ #include <cstdlib>
+ #include <vector>
+ 
++RT_OFFLOAD_VAR_GROUP_BEGIN
++
+ static constexpr unsigned kDefaultAllocator = 0;
+ 
+ // Allocator used for CUF
+@@ -21,6 +23,8 @@
+ static constexpr unsigned kManagedAllocatorPos = 3;
+ static constexpr unsigned kUnifiedAllocatorPos = 4;
+ 
++RT_OFFLOAD_VAR_GROUP_END
++
+ #define MAX_ALLOCATOR 7 // 3 bits are reserved in the descriptor.
+ 
+ namespace Fortran::runtime {
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Runtime/freestanding-tools.h llvm-project-aso/flang/include/flang/Runtime/freestanding-tools.h
+--- llvm-project-aso-orig/flang/include/flang/Runtime/freestanding-tools.h	2024-11-23 20:25:26.835275192 -0600
++++ llvm-project-aso/flang/include/flang/Runtime/freestanding-tools.h	2024-11-23 20:39:47.176175380 -0600
+@@ -23,6 +23,16 @@
+ #define STD_FILL_N_UNSUPPORTED 1
+ #endif
+ 
++#if !defined(STD_MEMSET_UNSUPPORTED) && \
++    (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__)
++#define STD_MEMSET_UNSUPPORTED 1
++#endif
++
++#if !defined(STD_MEMCPY_UNSUPPORTED) && \
++    (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__)
++#define STD_MEMCPY_UNSUPPORTED 1
++#endif
++
+ #if !defined(STD_MEMMOVE_UNSUPPORTED) && \
+     (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__)
+ #define STD_MEMMOVE_UNSUPPORTED 1
+@@ -63,6 +73,25 @@
+ #define STD_TOUPPER_UNSUPPORTED 1
+ #endif
+ 
++#if defined(OMP_OFFLOAD_BUILD) || defined(OMP_NOHOST_BUILD)
++// #pragma message "Using replacements for unsupported std functions"
++#define STD_FILL_N_UNSUPPORTED 1
++#define STD_MEMSET_USE_BUILTIN 1
++#define STD_MEMSET_UNSUPPORTED 1
++#define STD_MEMCPY_USE_BUILTIN 1
++#define STD_MEMCPY_UNSUPPORTED 1
++// #define STD_MEMMOVE_USE_BUILTIN 1  // address now taken in assign.h
++#define STD_MEMMOVE_UNSUPPORTED 1
++// #define STD_STRLEN_USE_BUILTIN 1  // still resolves to strlen
++#define STD_STRLEN_UNSUPPORTED 1
++#define STD_MEMCMP_UNSUPPORTED 1
++#define STD_REALLOC_UNSUPPORTED 1
++#define STD_MEMCHR_UNSUPPORTED 1
++#define STD_STRCPY_UNSUPPORTED 1
++#define STD_STRCMP_UNSUPPORTED 1
++#define STD_TOUPPER_UNSUPPORTED 1
++#endif
++
+ namespace Fortran::runtime {
+ 
+ #if STD_FILL_N_UNSUPPORTED
+@@ -79,7 +108,52 @@
+ using std::fill_n;
+ #endif // !STD_FILL_N_UNSUPPORTED
+ 
+-#if STD_MEMMOVE_UNSUPPORTED
++#if STD_MEMSET_USE_BUILTIN
++static inline RT_API_ATTRS void memset(
++    void *dest, uint8_t value, std::size_t count) {
++  __builtin_memset(dest, value, count);
++}
++#elif STD_MEMSET_UNSUPPORTED
++static inline RT_API_ATTRS void memset(
++    void *dest, uint8_t value, std::size_t count) {
++  char *to{reinterpret_cast<char *>(dest)};
++  while (count--) {
++    *to++ = value;
++  }
++  return;
++}
++#else
++using std::memset;
++#endif
++
++#if STD_MEMCPY_USE_BUILTIN
++static inline RT_API_ATTRS void memcpy(
++    void *dest, const void *src, std::size_t count) {
++  __builtin_memcpy(dest, src, count);
++}
++#elif STD_MEMCPY_UNSUPPORTED
++static inline RT_API_ATTRS void memcpy(
++    void *dest, const void *src, std::size_t count) {
++  char *to{reinterpret_cast<char *>(dest)};
++  const char *from{reinterpret_cast<const char *>(src)};
++  if (to == from) {
++    return;
++  }
++  while (count--) {
++    *to++ = *from++;
++  }
++  return;
++}
++#else
++using std::memcpy;
++#endif
++
++#if STD_MEMMOVE_USE_BUILTIN
++static inline RT_API_ATTRS void memmove(
++    void *dest, const void *src, std::size_t count) {
++  __builtin_memmove(dest, src, count);
++}
++#elif STD_MEMMOVE_UNSUPPORTED
+ // Provides alternative implementation for std::memmove(), if
+ // it is not supported.
+ static inline RT_API_ATTRS void *memmove(
+@@ -88,10 +162,10 @@
+   const char *from{reinterpret_cast<const char *>(src)};
+ 
+   if (to == from) {
+-    return;
++    return dest;
+   }
+   if (to + count <= from || from + count <= to) {
+-    std::memcpy(dest, src, count);
++    memcpy(dest, src, count);
+   } else if (to < from) {
+     while (count--) {
+       *to++ = *from++;
+@@ -118,7 +192,11 @@
+ }
+ #endif
+ 
+-#if STD_STRLEN_UNSUPPORTED
++#if STD_STRLEN_USE_BUILTIN
++static inline RT_API_ATTRS std::size_t strlen(const char *str) {
++  return __builtin_strlen(str);
++}
++#elif STD_STRLEN_UNSUPPORTED
+ // Provides alternative implementation for std::strlen(), if
+ // it is not supported.
+ static inline RT_API_ATTRS std::size_t strlen(const char *str) {
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/include/flang/Tools/CrossToolHelpers.h llvm-project-aso/flang/include/flang/Tools/CrossToolHelpers.h
+--- llvm-project-aso-orig/flang/include/flang/Tools/CrossToolHelpers.h	2024-11-23 20:25:26.835275192 -0600
++++ llvm-project-aso/flang/include/flang/Tools/CrossToolHelpers.h	2024-11-23 20:39:47.176175380 -0600
+@@ -165,7 +165,7 @@
+   bool OpenMPIsTargetDevice = false;
+   bool OpenMPIsGPU = false;
+   bool OpenMPForceUSM = false;
+-  uint32_t OpenMPVersion = 11;
++  uint32_t OpenMPVersion = 52;
+   std::string OMPHostIRFile = {};
+   std::vector<llvm::Triple> OMPTargetTriples = {};
+   bool NoGPULib = false;
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Frontend/CompilerInvocation.cpp llvm-project-aso/flang/lib/Frontend/CompilerInvocation.cpp
+--- llvm-project-aso-orig/flang/lib/Frontend/CompilerInvocation.cpp	2024-11-23 20:25:26.835275192 -0600
++++ llvm-project-aso/flang/lib/Frontend/CompilerInvocation.cpp	2024-11-23 20:39:47.180175366 -0600
+@@ -157,6 +157,32 @@
    return true;
  }
  
@@ -4162,34 +4842,63 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Frontend/CompilerInvoc
  static bool parseVectorLibArg(Fortran::frontend::CodeGenOptions &opts,
                                llvm::opt::ArgList &args,
                                clang::DiagnosticsEngine &diags) {
-@@ -386,6 +412,8 @@
+@@ -422,6 +448,13 @@
                     clang::driver::options::OPT_funderscoring, false)) {
      opts.Underscoring = 0;
    }
 +
++  if (args.hasFlag(clang::driver::options::OPT_fno_offload_global_filtering,
++                   clang::driver::options::OPT_foffload_global_filtering, false)) {
++    opts.OffloadGlobalFiltering = 0;
++  }
++
 +  parseDoConcurrentMapping(opts, args, diags);
  }
  
  /// Parses all target input arguments and populates the target
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Frontend/FrontendActions.cpp llvm-project/flang/lib/Frontend/FrontendActions.cpp
---- llvm-project.orig/flang/lib/Frontend/FrontendActions.cpp	2024-06-12 10:43:12.612210579 -0500
-+++ llvm-project/flang/lib/Frontend/FrontendActions.cpp	2024-06-12 10:44:09.347614281 -0500
-@@ -320,16 +320,34 @@
+@@ -1014,8 +1047,8 @@
+   unsigned numErrorsBefore = diags.getNumErrors();
+   llvm::Triple t(res.getTargetOpts().triple);
+ 
+-  // By default OpenMP is set to 1.1 version
+-  res.getLangOpts().OpenMPVersion = 11;
++  // By default OpenMP is set to 5.2 version
++  res.getLangOpts().OpenMPVersion = 52;
+   res.getFrontendOpts().features.Enable(
+       Fortran::common::LanguageFeature::OpenMP);
+   if (int Version = getLastArgIntValue(
+@@ -1483,6 +1516,7 @@
+   auto &fortranOptions = getFortranOpts();
+   const auto &frontendOptions = getFrontendOpts();
+   // Populate the macro list with version numbers and other predefinitions.
++  fortranOptions.predefinitions.emplace_back("__amdflang__", "1");
+   fortranOptions.predefinitions.emplace_back("__flang__", "1");
+   fortranOptions.predefinitions.emplace_back("__flang_major__",
+                                              FLANG_VERSION_MAJOR_STRING);
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Frontend/FrontendActions.cpp llvm-project-aso/flang/lib/Frontend/FrontendActions.cpp
+--- llvm-project-aso-orig/flang/lib/Frontend/FrontendActions.cpp	2024-11-23 20:25:26.835275192 -0600
++++ llvm-project-aso/flang/lib/Frontend/FrontendActions.cpp	2024-11-23 20:39:47.180175366 -0600
+@@ -330,16 +330,38 @@
    // Add OpenMP-related passes
    // WARNING: These passes must be run immediately after the lowering to ensure
    // that the FIR is correct with respect to OpenMP operations/attributes.
 -  if (ci.getInvocation().getFrontendOpts().features.IsEnabled(
 -          Fortran::common::LanguageFeature::OpenMP)) {
+-    bool isDevice = false;
 +  bool isOpenMPEnabled =
 +      ci.getInvocation().getFrontendOpts().features.IsEnabled(
 +          Fortran::common::LanguageFeature::OpenMP);
 +
++  fir::OpenMPFIRPassPipelineOpts opts;
++
 +  using DoConcurrentMappingKind =
 +      Fortran::frontend::CodeGenOptions::DoConcurrentMappingKind;
-+  DoConcurrentMappingKind doConcurrentMappingKind =
++  opts.doConcurrentMappingKind =
 +      ci.getInvocation().getCodeGenOpts().getDoConcurrentMapping();
++  opts.enableOffloadGlobalFiltering =
++      ci.getInvocation().getCodeGenOpts().OffloadGlobalFiltering;
 +
-+  if (doConcurrentMappingKind != DoConcurrentMappingKind::DCMK_None &&
++  if (opts.doConcurrentMappingKind != DoConcurrentMappingKind::DCMK_None &&
 +      !isOpenMPEnabled) {
 +    unsigned diagID = ci.getDiagnostics().getCustomDiagID(
 +        clang::DiagnosticsEngine::Warning,
@@ -4199,22 +4908,38 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Frontend/FrontendActio
 +  }
 +
 +  if (isOpenMPEnabled) {
-     bool isDevice = false;
++    opts.isTargetDevice = false;
      if (auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
              mlirModule->getOperation()))
-       isDevice = offloadMod.getIsTargetDevice();
+-      isDevice = offloadMod.getIsTargetDevice();
++      opts.isTargetDevice = offloadMod.getIsTargetDevice();
 +
      // WARNING: This pipeline must be run immediately after the lowering to
      // ensure that the FIR is correct with respect to OpenMP operations/
      // attributes.
 -    fir::createOpenMPFIRPassPipeline(pm, isDevice);
-+    fir::createOpenMPFIRPassPipeline(pm, isDevice, doConcurrentMappingKind);
++    fir::createOpenMPFIRPassPipeline(pm, opts);
    }
  
    pm.enableVerifier(/*verifyPasses=*/true);
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/ClauseProcessor.cpp llvm-project/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
---- llvm-project.orig/flang/lib/Lower/OpenMP/ClauseProcessor.cpp	2024-06-12 10:43:12.620210495 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/ClauseProcessor.cpp	2024-06-12 10:44:09.347614281 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/DirectivesCommon.h llvm-project-aso/flang/lib/Lower/DirectivesCommon.h
+--- llvm-project-aso-orig/flang/lib/Lower/DirectivesCommon.h	2024-11-06 08:35:35.199250155 -0600
++++ llvm-project-aso/flang/lib/Lower/DirectivesCommon.h	2024-11-23 20:39:47.180175366 -0600
+@@ -984,7 +984,10 @@
+         // If it is a scalar subscript, then the upper bound
+         // is equal to the lower bound, and the extent is one.
+         ubound = lbound;
+-        extent = one;
++        if (treatIndexAsSection)
++          extent = fir::factory::readExtent(builder, loc, dataExv, dimension);
++        else
++          extent = one;
+       } else {
+         asFortran << ':';
+         Fortran::semantics::MaybeExpr upper =
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/ClauseProcessor.cpp llvm-project-aso/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/ClauseProcessor.cpp	2024-11-23 20:25:26.839275178 -0600
++++ llvm-project-aso/flang/lib/Lower/OpenMP/ClauseProcessor.cpp	2024-11-23 20:39:47.180175366 -0600
 @@ -11,8 +11,8 @@
  //===----------------------------------------------------------------------===//
  
@@ -4225,9 +4950,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/ClausePro
  #include "flang/Lower/PFTBuilder.h"
  #include "flang/Parser/tools.h"
  #include "flang/Semantics/tools.h"
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/ClauseProcessor.h llvm-project/flang/lib/Lower/OpenMP/ClauseProcessor.h
---- llvm-project.orig/flang/lib/Lower/OpenMP/ClauseProcessor.h	2024-06-12 10:43:12.620210495 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/ClauseProcessor.h	2024-06-12 10:44:09.347614281 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/ClauseProcessor.h llvm-project-aso/flang/lib/Lower/OpenMP/ClauseProcessor.h
+--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/ClauseProcessor.h	2024-11-23 20:25:26.839275178 -0600
++++ llvm-project-aso/flang/lib/Lower/OpenMP/ClauseProcessor.h	2024-11-23 20:39:47.180175366 -0600
 @@ -12,12 +12,12 @@
  #ifndef FORTRAN_LOWER_CLAUSEPROCESSOR_H
  #define FORTRAN_LOWER_CLAUSEPROCESSOR_H
@@ -4243,9 +4968,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/ClausePro
  #include "flang/Optimizer/Builder/Todo.h"
  #include "flang/Parser/dump-parse-tree.h"
  #include "flang/Parser/parse-tree.h"
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Clauses.cpp llvm-project/flang/lib/Lower/OpenMP/Clauses.cpp
---- llvm-project.orig/flang/lib/Lower/OpenMP/Clauses.cpp	2024-06-12 10:43:12.620210495 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/Clauses.cpp	2024-06-12 10:44:09.347614281 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/Clauses.cpp llvm-project-aso/flang/lib/Lower/OpenMP/Clauses.cpp
+--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/Clauses.cpp	2024-11-23 20:25:26.839275178 -0600
++++ llvm-project-aso/flang/lib/Lower/OpenMP/Clauses.cpp	2024-11-23 20:39:47.180175366 -0600
 @@ -6,7 +6,7 @@
  //
  //===----------------------------------------------------------------------===//
@@ -4255,335 +4980,20 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Clauses.c
  
  #include "flang/Common/idioms.h"
  #include "flang/Evaluate/expression.h"
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Clauses.h llvm-project/flang/lib/Lower/OpenMP/Clauses.h
---- llvm-project.orig/flang/lib/Lower/OpenMP/Clauses.h	2024-06-12 10:43:12.620210495 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/Clauses.h	1969-12-31 18:00:00.000000000 -0600
-@@ -1,312 +0,0 @@
--//===-- Clauses.h -- OpenMP clause handling -------------------------------===//
--//
--// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
--// See https://llvm.org/LICENSE.txt for license information.
--// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
--//
--//===----------------------------------------------------------------------===//
--#ifndef FORTRAN_LOWER_OPENMP_CLAUSES_H
--#define FORTRAN_LOWER_OPENMP_CLAUSES_H
--
--#include "flang/Evaluate/expression.h"
--#include "flang/Parser/parse-tree.h"
--#include "flang/Semantics/expression.h"
--#include "flang/Semantics/semantics.h"
--#include "flang/Semantics/symbol.h"
--
--#include "llvm/ADT/STLExtras.h"
--#include "llvm/Frontend/OpenMP/ClauseT.h"
--
--#include <optional>
--#include <type_traits>
--#include <utility>
--
--namespace Fortran::semantics {
--class Symbol;
--}
--
--namespace Fortran::lower::omp {
--using namespace Fortran;
--using SomeExpr = semantics::SomeExpr;
--using MaybeExpr = semantics::MaybeExpr;
--
--// evaluate::SomeType doesn't provide == operation. It's not really used in
--// flang's clauses so far, so a trivial implementation is sufficient.
--struct TypeTy : public evaluate::SomeType {
--  bool operator==(const TypeTy &t) const { return true; }
--};
--
--template <typename ExprTy>
--struct IdTyTemplate {
--  // "symbol" is always non-null for id's of actual objects.
--  Fortran::semantics::Symbol *symbol;
--  std::optional<ExprTy> designator;
--
--  bool operator==(const IdTyTemplate &other) const {
--    // If symbols are different, then the objects are different.
--    if (symbol != other.symbol)
--      return false;
--    if (symbol == nullptr)
--      return true;
--    // Equal symbols don't necessarily indicate identical objects,
--    // for example, a derived object component may use a single symbol,
--    // which will refer to different objects for different designators,
--    // e.g. a%c and b%c.
--    return designator == other.designator;
--  }
--
--  operator bool() const { return symbol != nullptr; }
--};
--
--using ExprTy = SomeExpr;
--
--template <typename T>
--using List = tomp::ListT<T>;
--} // namespace Fortran::lower::omp
--
--// Specialization of the ObjectT template
--namespace tomp::type {
--template <>
--struct ObjectT<Fortran::lower::omp::IdTyTemplate<Fortran::lower::omp::ExprTy>,
--               Fortran::lower::omp::ExprTy> {
--  using IdTy = Fortran::lower::omp::IdTyTemplate<Fortran::lower::omp::ExprTy>;
--  using ExprTy = Fortran::lower::omp::ExprTy;
--
--  IdTy id() const { return identity; }
--  Fortran::semantics::Symbol *sym() const { return identity.symbol; }
--  const std::optional<ExprTy> &ref() const { return identity.designator; }
--
--  IdTy identity;
--};
--} // namespace tomp::type
--
--namespace Fortran::lower::omp {
--using IdTy = IdTyTemplate<ExprTy>;
--}
--
--namespace std {
--template <>
--struct hash<Fortran::lower::omp::IdTy> {
--  size_t operator()(const Fortran::lower::omp::IdTy &id) const {
--    return static_cast<size_t>(reinterpret_cast<uintptr_t>(id.symbol));
--  }
--};
--} // namespace std
--
--namespace Fortran::lower::omp {
--using Object = tomp::ObjectT<IdTy, ExprTy>;
--using ObjectList = tomp::ObjectListT<IdTy, ExprTy>;
--
--Object makeObject(const parser::OmpObject &object,
--                  semantics::SemanticsContext &semaCtx);
--Object makeObject(const parser::Name &name,
--                  semantics::SemanticsContext &semaCtx);
--Object makeObject(const parser::Designator &dsg,
--                  semantics::SemanticsContext &semaCtx);
--Object makeObject(const parser::StructureComponent &comp,
--                  semantics::SemanticsContext &semaCtx);
--
--inline auto makeObjectFn(semantics::SemanticsContext &semaCtx) {
--  return [&](auto &&s) { return makeObject(s, semaCtx); };
--}
--
--template <typename T>
--SomeExpr makeExpr(T &&pftExpr, semantics::SemanticsContext &semaCtx) {
--  auto maybeExpr = evaluate::ExpressionAnalyzer(semaCtx).Analyze(pftExpr);
--  assert(maybeExpr);
--  return std::move(*maybeExpr);
--}
--
--inline auto makeExprFn(semantics::SemanticsContext &semaCtx) {
--  return [&](auto &&s) { return makeExpr(s, semaCtx); };
--}
--
--template <
--    typename ContainerTy, typename FunctionTy,
--    typename ElemTy = typename llvm::remove_cvref_t<ContainerTy>::value_type,
--    typename ResultTy = std::invoke_result_t<FunctionTy, ElemTy>>
--List<ResultTy> makeList(ContainerTy &&container, FunctionTy &&func) {
--  List<ResultTy> v;
--  llvm::transform(container, std::back_inserter(v), func);
--  return v;
--}
--
--inline ObjectList makeObjects(const parser::OmpObjectList &objects,
--                              semantics::SemanticsContext &semaCtx) {
--  return makeList(objects.v, makeObjectFn(semaCtx));
--}
--
--template <typename FuncTy, //
--          typename ArgTy,  //
--          typename ResultTy = std::invoke_result_t<FuncTy, ArgTy>>
--std::optional<ResultTy> maybeApply(FuncTy &&func,
--                                   const std::optional<ArgTy> &arg) {
--  if (!arg)
--    return std::nullopt;
--  return std::move(func(*arg));
--}
--
--std::optional<Object> getBaseObject(const Object &object,
--                                    semantics::SemanticsContext &semaCtx);
--
--namespace clause {
--using DefinedOperator = tomp::type::DefinedOperatorT<IdTy, ExprTy>;
--using ProcedureDesignator = tomp::type::ProcedureDesignatorT<IdTy, ExprTy>;
--using ReductionOperator = tomp::type::ReductionIdentifierT<IdTy, ExprTy>;
--
--// "Requires" clauses are handled early on, and the aggregated information
--// is stored in the Symbol details of modules, programs, and subprograms.
--// These clauses are still handled here to cover all alternatives in the
--// main clause variant.
--
--using AcqRel = tomp::clause::AcqRelT<TypeTy, IdTy, ExprTy>;
--using Acquire = tomp::clause::AcquireT<TypeTy, IdTy, ExprTy>;
--using AdjustArgs = tomp::clause::AdjustArgsT<TypeTy, IdTy, ExprTy>;
--using Affinity = tomp::clause::AffinityT<TypeTy, IdTy, ExprTy>;
--using Aligned = tomp::clause::AlignedT<TypeTy, IdTy, ExprTy>;
--using Align = tomp::clause::AlignT<TypeTy, IdTy, ExprTy>;
--using Allocate = tomp::clause::AllocateT<TypeTy, IdTy, ExprTy>;
--using Allocator = tomp::clause::AllocatorT<TypeTy, IdTy, ExprTy>;
--using AppendArgs = tomp::clause::AppendArgsT<TypeTy, IdTy, ExprTy>;
--using AtomicDefaultMemOrder =
--    tomp::clause::AtomicDefaultMemOrderT<TypeTy, IdTy, ExprTy>;
--using At = tomp::clause::AtT<TypeTy, IdTy, ExprTy>;
--using Bind = tomp::clause::BindT<TypeTy, IdTy, ExprTy>;
--using Capture = tomp::clause::CaptureT<TypeTy, IdTy, ExprTy>;
--using Collapse = tomp::clause::CollapseT<TypeTy, IdTy, ExprTy>;
--using Compare = tomp::clause::CompareT<TypeTy, IdTy, ExprTy>;
--using Copyin = tomp::clause::CopyinT<TypeTy, IdTy, ExprTy>;
--using Copyprivate = tomp::clause::CopyprivateT<TypeTy, IdTy, ExprTy>;
--using Defaultmap = tomp::clause::DefaultmapT<TypeTy, IdTy, ExprTy>;
--using Default = tomp::clause::DefaultT<TypeTy, IdTy, ExprTy>;
--using Depend = tomp::clause::DependT<TypeTy, IdTy, ExprTy>;
--using Destroy = tomp::clause::DestroyT<TypeTy, IdTy, ExprTy>;
--using Detach = tomp::clause::DetachT<TypeTy, IdTy, ExprTy>;
--using Device = tomp::clause::DeviceT<TypeTy, IdTy, ExprTy>;
--using DeviceType = tomp::clause::DeviceTypeT<TypeTy, IdTy, ExprTy>;
--using DistSchedule = tomp::clause::DistScheduleT<TypeTy, IdTy, ExprTy>;
--using Doacross = tomp::clause::DoacrossT<TypeTy, IdTy, ExprTy>;
--using DynamicAllocators =
--    tomp::clause::DynamicAllocatorsT<TypeTy, IdTy, ExprTy>;
--using Enter = tomp::clause::EnterT<TypeTy, IdTy, ExprTy>;
--using Exclusive = tomp::clause::ExclusiveT<TypeTy, IdTy, ExprTy>;
--using Fail = tomp::clause::FailT<TypeTy, IdTy, ExprTy>;
--using Filter = tomp::clause::FilterT<TypeTy, IdTy, ExprTy>;
--using Final = tomp::clause::FinalT<TypeTy, IdTy, ExprTy>;
--using Firstprivate = tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy>;
--using From = tomp::clause::FromT<TypeTy, IdTy, ExprTy>;
--using Full = tomp::clause::FullT<TypeTy, IdTy, ExprTy>;
--using Grainsize = tomp::clause::GrainsizeT<TypeTy, IdTy, ExprTy>;
--using HasDeviceAddr = tomp::clause::HasDeviceAddrT<TypeTy, IdTy, ExprTy>;
--using Hint = tomp::clause::HintT<TypeTy, IdTy, ExprTy>;
--using If = tomp::clause::IfT<TypeTy, IdTy, ExprTy>;
--using Inbranch = tomp::clause::InbranchT<TypeTy, IdTy, ExprTy>;
--using Inclusive = tomp::clause::InclusiveT<TypeTy, IdTy, ExprTy>;
--using Indirect = tomp::clause::IndirectT<TypeTy, IdTy, ExprTy>;
--using Init = tomp::clause::InitT<TypeTy, IdTy, ExprTy>;
--using InReduction = tomp::clause::InReductionT<TypeTy, IdTy, ExprTy>;
--using IsDevicePtr = tomp::clause::IsDevicePtrT<TypeTy, IdTy, ExprTy>;
--using Lastprivate = tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy>;
--using Linear = tomp::clause::LinearT<TypeTy, IdTy, ExprTy>;
--using Link = tomp::clause::LinkT<TypeTy, IdTy, ExprTy>;
--using Map = tomp::clause::MapT<TypeTy, IdTy, ExprTy>;
--using Match = tomp::clause::MatchT<TypeTy, IdTy, ExprTy>;
--using Mergeable = tomp::clause::MergeableT<TypeTy, IdTy, ExprTy>;
--using Message = tomp::clause::MessageT<TypeTy, IdTy, ExprTy>;
--using Nocontext = tomp::clause::NocontextT<TypeTy, IdTy, ExprTy>;
--using Nogroup = tomp::clause::NogroupT<TypeTy, IdTy, ExprTy>;
--using Nontemporal = tomp::clause::NontemporalT<TypeTy, IdTy, ExprTy>;
--using Notinbranch = tomp::clause::NotinbranchT<TypeTy, IdTy, ExprTy>;
--using Novariants = tomp::clause::NovariantsT<TypeTy, IdTy, ExprTy>;
--using Nowait = tomp::clause::NowaitT<TypeTy, IdTy, ExprTy>;
--using NumTasks = tomp::clause::NumTasksT<TypeTy, IdTy, ExprTy>;
--using NumTeams = tomp::clause::NumTeamsT<TypeTy, IdTy, ExprTy>;
--using NumThreads = tomp::clause::NumThreadsT<TypeTy, IdTy, ExprTy>;
--using OmpxAttribute = tomp::clause::OmpxAttributeT<TypeTy, IdTy, ExprTy>;
--using OmpxBare = tomp::clause::OmpxBareT<TypeTy, IdTy, ExprTy>;
--using OmpxDynCgroupMem = tomp::clause::OmpxDynCgroupMemT<TypeTy, IdTy, ExprTy>;
--using Ordered = tomp::clause::OrderedT<TypeTy, IdTy, ExprTy>;
--using Order = tomp::clause::OrderT<TypeTy, IdTy, ExprTy>;
--using Partial = tomp::clause::PartialT<TypeTy, IdTy, ExprTy>;
--using Priority = tomp::clause::PriorityT<TypeTy, IdTy, ExprTy>;
--using Private = tomp::clause::PrivateT<TypeTy, IdTy, ExprTy>;
--using ProcBind = tomp::clause::ProcBindT<TypeTy, IdTy, ExprTy>;
--using Read = tomp::clause::ReadT<TypeTy, IdTy, ExprTy>;
--using Reduction = tomp::clause::ReductionT<TypeTy, IdTy, ExprTy>;
--using Relaxed = tomp::clause::RelaxedT<TypeTy, IdTy, ExprTy>;
--using Release = tomp::clause::ReleaseT<TypeTy, IdTy, ExprTy>;
--using ReverseOffload = tomp::clause::ReverseOffloadT<TypeTy, IdTy, ExprTy>;
--using Safelen = tomp::clause::SafelenT<TypeTy, IdTy, ExprTy>;
--using Schedule = tomp::clause::ScheduleT<TypeTy, IdTy, ExprTy>;
--using SeqCst = tomp::clause::SeqCstT<TypeTy, IdTy, ExprTy>;
--using Severity = tomp::clause::SeverityT<TypeTy, IdTy, ExprTy>;
--using Shared = tomp::clause::SharedT<TypeTy, IdTy, ExprTy>;
--using Simdlen = tomp::clause::SimdlenT<TypeTy, IdTy, ExprTy>;
--using Simd = tomp::clause::SimdT<TypeTy, IdTy, ExprTy>;
--using Sizes = tomp::clause::SizesT<TypeTy, IdTy, ExprTy>;
--using TaskReduction = tomp::clause::TaskReductionT<TypeTy, IdTy, ExprTy>;
--using ThreadLimit = tomp::clause::ThreadLimitT<TypeTy, IdTy, ExprTy>;
--using Threads = tomp::clause::ThreadsT<TypeTy, IdTy, ExprTy>;
--using To = tomp::clause::ToT<TypeTy, IdTy, ExprTy>;
--using UnifiedAddress = tomp::clause::UnifiedAddressT<TypeTy, IdTy, ExprTy>;
--using UnifiedSharedMemory =
--    tomp::clause::UnifiedSharedMemoryT<TypeTy, IdTy, ExprTy>;
--using Uniform = tomp::clause::UniformT<TypeTy, IdTy, ExprTy>;
--using Unknown = tomp::clause::UnknownT<TypeTy, IdTy, ExprTy>;
--using Untied = tomp::clause::UntiedT<TypeTy, IdTy, ExprTy>;
--using Update = tomp::clause::UpdateT<TypeTy, IdTy, ExprTy>;
--using UseDeviceAddr = tomp::clause::UseDeviceAddrT<TypeTy, IdTy, ExprTy>;
--using UseDevicePtr = tomp::clause::UseDevicePtrT<TypeTy, IdTy, ExprTy>;
--using UsesAllocators = tomp::clause::UsesAllocatorsT<TypeTy, IdTy, ExprTy>;
--using Use = tomp::clause::UseT<TypeTy, IdTy, ExprTy>;
--using Weak = tomp::clause::WeakT<TypeTy, IdTy, ExprTy>;
--using When = tomp::clause::WhenT<TypeTy, IdTy, ExprTy>;
--using Write = tomp::clause::WriteT<TypeTy, IdTy, ExprTy>;
--} // namespace clause
--
--using tomp::type::operator==;
--
--struct CancellationConstructType {
--  using EmptyTrait = std::true_type;
--};
--struct Depobj {
--  using EmptyTrait = std::true_type;
--};
--struct Flush {
--  using EmptyTrait = std::true_type;
--};
--struct MemoryOrder {
--  using EmptyTrait = std::true_type;
--};
--struct Threadprivate {
--  using EmptyTrait = std::true_type;
--};
--
--using ClauseBase = tomp::ClauseT<TypeTy, IdTy, ExprTy,
--                                 // Extras...
--                                 CancellationConstructType, Depobj, Flush,
--                                 MemoryOrder, Threadprivate>;
--
--struct Clause : public ClauseBase {
--  Clause(ClauseBase &&base, const parser::CharBlock source = {})
--      : ClauseBase(std::move(base)), source(source) {}
--  // "source" will be ignored by tomp::type::operator==.
--  parser::CharBlock source;
--};
--
--template <typename Specific>
--Clause makeClause(llvm::omp::Clause id, Specific &&specific,
--                  parser::CharBlock source = {}) {
--  return Clause(typename Clause::BaseT{id, specific}, source);
--}
--
--Clause makeClause(const parser::OmpClause &cls,
--                  semantics::SemanticsContext &semaCtx);
--
--List<Clause> makeClauses(const parser::OmpClauseList &clauses,
--                         semantics::SemanticsContext &semaCtx);
--
--bool transferLocations(const List<Clause> &from, List<Clause> &to);
--} // namespace Fortran::lower::omp
--
--#endif // FORTRAN_LOWER_OPENMP_CLAUSES_H
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp llvm-project/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
---- llvm-project.orig/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp	2024-06-12 10:43:12.620210495 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp	2024-06-12 10:44:09.347614281 -0500
-@@ -12,7 +12,7 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp llvm-project-aso/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp	2024-10-18 17:40:32.496992373 -0500
++++ llvm-project-aso/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp	2024-11-23 20:39:47.180175366 -0600
+@@ -12,8 +12,8 @@
  
  #include "DataSharingProcessor.h"
  
 -#include "Utils.h"
+ #include "flang/Lower/ConvertVariable.h"
 +#include "flang/Lower/OpenMP/Utils.h"
  #include "flang/Lower/PFTBuilder.h"
  #include "flang/Lower/SymbolMap.h"
  #include "flang/Optimizer/Builder/HLFIRTools.h"
-@@ -47,20 +47,24 @@
+@@ -49,19 +49,24 @@
    });
  }
  
@@ -4597,88 +5007,43 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/DataShari
 +}
  
 -  privatize(clauseOps);
-+void DataSharingProcessor::processStep2() {
++void DataSharingProcessor::processStep2(
++    mlir::omp::PrivateClauseOps *clauseOps) {
 +  if (privatizationDone)
 +    return;
  
-+  privatize();
++  privatize(clauseOps);
    insertBarrier();
 +  privatizationDone = true;
  }
  
 -void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
--  // 'sections' lastprivate is handled by genOMP()
 +void DataSharingProcessor::processStep3(mlir::Operation *op, bool isLoop) {
-+  //  'sections' lastprivate is handled by genOMP()
+   // 'sections' lastprivate is handled by genOMP()
    if (!mlir::isa<mlir::omp::SectionsOp>(op)) {
-     insPt = firOpBuilder.saveInsertionPoint();
-     copyLastPrivatize(op);
-@@ -68,15 +72,12 @@
-   }
- 
-   if (isLoop) {
--    // push deallocs out of the loop
-+    //  push deallocs out of the loop
-     firOpBuilder.setInsertionPointAfter(op);
-     insertDeallocs();
-   } else {
--    // insert dummy instruction to mark the insertion position
--    mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
--        op->getLoc(), firOpBuilder.getIndexType());
-+    mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
-     insertDeallocs();
--    firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
-   }
- }
- 
-@@ -414,14 +415,15 @@
-                    preDeterminedSymbols);
- }
- 
--void DataSharingProcessor::privatize(mlir::omp::PrivateClauseOps *clauseOps) {
-+void DataSharingProcessor::privatize() {
-   for (const semantics::Symbol *sym : allPrivatizedSymbols) {
-     if (const auto *commonDet =
+     mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
+@@ -424,8 +429,9 @@
              sym->detailsIf<semantics::CommonBlockDetails>()) {
        for (const auto &mem : commonDet->objects())
--        doPrivatize(&*mem, clauseOps);
+         doPrivatize(&*mem, clauseOps);
 -    } else
--      doPrivatize(sym, clauseOps);
-+        doPrivatize(&*mem);
 +    } else {
-+      doPrivatize(sym);
+       doPrivatize(sym, clauseOps);
 +    }
    }
  }
  
-@@ -438,8 +440,7 @@
-     }
- }
- 
--void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
--                                       mlir::omp::PrivateClauseOps *clauseOps) {
-+void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym) {
-   if (!useDelayedPrivatization) {
-     cloneSymbol(sym);
-     copyFirstPrivateSymbol(sym);
-@@ -539,10 +540,10 @@
-     return result;
-   }();
- 
--  if (clauseOps) {
--    clauseOps->privatizers.push_back(mlir::SymbolRefAttr::get(privatizerOp));
--    clauseOps->privateVars.push_back(hsb.getAddr());
--  }
-+  privateClauseOps.privatizers.push_back(
-+      mlir::SymbolRefAttr::get(privatizerOp));
-+  privateClauseOps.privateVars.push_back(hsb.getAddr());
-+  delayedPrivSyms.push_back(sym);
- 
+@@ -553,7 +559,6 @@
+     clauseOps->privateSyms.push_back(mlir::SymbolRefAttr::get(privatizerOp));
+     clauseOps->privateVars.push_back(hsb.getAddr());
+   }
+-
    symToPrivatizer[sym] = privatizerOp;
  }
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/DataSharingProcessor.h llvm-project/flang/lib/Lower/OpenMP/DataSharingProcessor.h
---- llvm-project.orig/flang/lib/Lower/OpenMP/DataSharingProcessor.h	2024-06-12 10:43:12.620210495 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/DataSharingProcessor.h	2024-06-12 10:44:09.347614281 -0500
+ 
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/DataSharingProcessor.h llvm-project-aso/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/DataSharingProcessor.h	2024-09-13 09:46:38.858303722 -0500
++++ llvm-project-aso/flang/lib/Lower/OpenMP/DataSharingProcessor.h	2024-11-23 20:39:47.180175366 -0600
 @@ -12,9 +12,9 @@
  #ifndef FORTRAN_LOWER_DATASHARINGPROCESSOR_H
  #define FORTRAN_LOWER_DATASHARINGPROCESSOR_H
@@ -4690,39 +5055,17 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/DataShari
  #include "flang/Optimizer/Builder/FIRBuilder.h"
  #include "flang/Parser/parse-tree.h"
  #include "flang/Semantics/symbol.h"
-@@ -89,7 +89,11 @@
+@@ -87,7 +87,9 @@
    bool shouldCollectPreDeterminedSymbols;
    bool useDelayedPrivatization;
    lower::SymMap *symTable;
 +
    OMPConstructSymbolVisitor visitor;
-+  mlir::omp::PrivateClauseOps privateClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> delayedPrivSyms;
 +  bool privatizationDone = false;
  
    bool needBarrier();
    void collectSymbols(semantics::Symbol::Flag flag,
-@@ -105,15 +109,10 @@
-   void collectDefaultSymbols();
-   void collectImplicitSymbols();
-   void collectPreDeterminedSymbols();
--  void privatize(mlir::omp::PrivateClauseOps *clauseOps);
--  void defaultPrivatize(
--      mlir::omp::PrivateClauseOps *clauseOps,
--      llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
--  void implicitPrivatize(
--      mlir::omp::PrivateClauseOps *clauseOps,
--      llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
--  void doPrivatize(const semantics::Symbol *sym,
--                   mlir::omp::PrivateClauseOps *clauseOps);
-+  void privatize();
-+  void defaultPrivatize();
-+  void implicitPrivatize();
-+  void doPrivatize(const semantics::Symbol *sym);
-   void copyLastPrivatize(mlir::Operation *op);
-   void insertLastPrivateCompare(mlir::Operation *op);
-   void cloneSymbol(const semantics::Symbol *sym);
-@@ -133,19 +132,33 @@
+@@ -125,19 +127,33 @@
                         bool useDelayedPrivatization = false,
                         lower::SymMap *symTable = nullptr);
  
@@ -4762,29 +5105,14 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/DataShari
 +  // before the operation is created since the bounds of the MLIR OpenMP
 +  // operation can be privatised.
 +  void processStep1();
-+  void processStep2();
++  void processStep2(mlir::omp::PrivateClauseOps *clauseOps = nullptr);
 +  void processStep3(mlir::Operation *op, bool isLoop);
  
-   void setLoopIV(mlir::Value iv) {
-     assert(!loopIV && "Loop iteration variable already set");
-@@ -156,6 +169,14 @@
-   getAllSymbolsToPrivatize() const {
-     return allPrivatizedSymbols;
-   }
-+
-+  const mlir::omp::PrivateClauseOps &getPrivateClauseOps() const {
-+    return privateClauseOps;
-+  }
-+
-+  llvm::ArrayRef<const semantics::Symbol *> getDelayedPrivSyms() const {
-+    return delayedPrivSyms;
-+  }
- };
+   void pushLoopIV(mlir::Value iv) { loopIVs.push_back(iv); }
  
- } // namespace omp
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Decomposer.cpp llvm-project/flang/lib/Lower/OpenMP/Decomposer.cpp
---- llvm-project.orig/flang/lib/Lower/OpenMP/Decomposer.cpp	2024-06-12 10:43:12.620210495 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/Decomposer.cpp	2024-06-12 10:44:09.347614281 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/Decomposer.cpp llvm-project-aso/flang/lib/Lower/OpenMP/Decomposer.cpp
+--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/Decomposer.cpp	2024-08-27 20:36:25.208173319 -0500
++++ llvm-project-aso/flang/lib/Lower/OpenMP/Decomposer.cpp	2024-11-23 20:39:47.180175366 -0600
 @@ -12,8 +12,8 @@
  
  #include "Decomposer.h"
@@ -4796,9 +5124,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Decompose
  #include "flang/Lower/PFTBuilder.h"
  #include "flang/Semantics/semantics.h"
  #include "flang/Tools/CrossToolHelpers.h"
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Decomposer.h llvm-project/flang/lib/Lower/OpenMP/Decomposer.h
---- llvm-project.orig/flang/lib/Lower/OpenMP/Decomposer.h	2024-06-12 10:43:12.620210495 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/Decomposer.h	2024-06-12 10:44:09.347614281 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/Decomposer.h llvm-project-aso/flang/lib/Lower/OpenMP/Decomposer.h
+--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/Decomposer.h	2024-08-27 20:36:25.208173319 -0500
++++ llvm-project-aso/flang/lib/Lower/OpenMP/Decomposer.h	2024-11-23 20:39:47.180175366 -0600
 @@ -8,7 +8,7 @@
  #ifndef FORTRAN_LOWER_OPENMP_DECOMPOSER_H
  #define FORTRAN_LOWER_OPENMP_DECOMPOSER_H
@@ -4806,11 +5134,11 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Decompose
 -#include "Clauses.h"
 +#include "flang/Lower/OpenMP/Clauses.h"
  #include "mlir/IR/BuiltinOps.h"
- #include "llvm/Frontend/OpenMP/ConstructCompositionT.h"
  #include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-project/flang/lib/Lower/OpenMP/OpenMP.cpp
---- llvm-project.orig/flang/lib/Lower/OpenMP/OpenMP.cpp	2024-06-12 10:43:12.620210495 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/OpenMP.cpp	2024-06-12 10:44:09.351614239 -0500
+ #include "llvm/Frontend/OpenMP/OMP.h"
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-project-aso/flang/lib/Lower/OpenMP/OpenMP.cpp
+--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/OpenMP.cpp	2024-11-23 20:25:26.839275178 -0600
++++ llvm-project-aso/flang/lib/Lower/OpenMP/OpenMP.cpp	2024-11-23 20:39:47.180175366 -0600
 @@ -13,16 +13,16 @@
  #include "flang/Lower/OpenMP.h"
  
@@ -4830,320 +5158,529 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/OpenMP.cp
  #include "flang/Lower/StatementContext.h"
  #include "flang/Lower/SymbolMap.h"
  #include "flang/Optimizer/Builder/BoxValue.h"
-@@ -45,12 +45,25 @@
+@@ -46,6 +46,19 @@
  // Code generation helper functions
  //===----------------------------------------------------------------------===//
  
-+static mlir::omp::TargetOp findParentTargetOp(mlir::OpBuilder &builder) {
-+  mlir::Operation *parentOp = builder.getBlock()->getParentOp();
-+  if (!parentOp)
-+    return nullptr;
++static void genOMPDispatch(lower::AbstractConverter &converter,
++                           lower::SymMap &symTable,
++                           semantics::SemanticsContext &semaCtx,
++                           lower::pft::Evaluation &eval, mlir::Location loc,
++                           const ConstructQueue &queue,
++                           ConstructQueue::const_iterator item);
++
++static void processHostEvalClauses(lower::AbstractConverter &converter,
++                                   semantics::SemanticsContext &semaCtx,
++                                   lower::StatementContext &stmtCtx,
++                                   lower::pft::Evaluation &eval,
++                                   mlir::Location loc);
++
+ namespace {
+ /// Structure holding the information needed to create and bind entry block
+ /// arguments associated to a single clause.
+@@ -64,6 +77,7 @@
+ /// Structure holding the information needed to create and bind entry block
+ /// arguments associated to all clauses that can define them.
+ struct EntryBlockArgs {
++  llvm::ArrayRef<mlir::Value> hostEvalVars;
+   EntryBlockArgsEntry inReduction;
+   EntryBlockArgsEntry map;
+   EntryBlockArgsEntry priv;
+@@ -86,18 +100,146 @@
+ 
+   auto getVars() const {
+     return llvm::concat<const mlir::Value>(
+-        inReduction.vars, map.vars, priv.vars, reduction.vars,
++        hostEvalVars, inReduction.vars, map.vars, priv.vars, reduction.vars,
+         taskReduction.vars, useDeviceAddr.vars, useDevicePtr.vars);
+   }
+ };
 +
-+  auto targetOp = llvm::dyn_cast<mlir::omp::TargetOp>(parentOp);
-+  if (!targetOp)
-+    targetOp = parentOp->getParentOfType<mlir::omp::TargetOp>();
++/// Structure holding information that is needed to pass host-evaluated
++/// information to later lowering stages.
++class HostEvalInfo {
++public:
++  // Allow this function access to private members in order to initialize them.
++  friend void ::processHostEvalClauses(lower::AbstractConverter &,
++                                       semantics::SemanticsContext &,
++                                       lower::StatementContext &,
++                                       lower::pft::Evaluation &,
++                                       mlir::Location);
++
++  /// Fill \c vars with values stored in \c ops.
++  ///
++  /// The order in which values are stored matches the one expected by \see
++  /// bindOperands().
++  void collectValues(llvm::SmallVectorImpl<mlir::Value> &vars) const {
++    vars.append(ops.loopLowerBounds);
++    vars.append(ops.loopUpperBounds);
++    vars.append(ops.loopSteps);
 +
-+  return targetOp;
-+}
++    if (ops.numTeamsLower)
++      vars.push_back(ops.numTeamsLower);
 +
- static void genOMPDispatch(lower::AbstractConverter &converter,
-                            lower::SymMap &symTable,
-                            semantics::SemanticsContext &semaCtx,
-                            lower::pft::Evaluation &eval, mlir::Location loc,
-                            const ConstructQueue &queue,
--                           ConstructQueue::iterator item);
-+                           ConstructQueue::iterator item,
-+                           DataSharingProcessor *dsp = nullptr);
- 
- static lower::pft::Evaluation *
- getCollapsedLoopEval(lower::pft::Evaluation &eval, int collapseValue) {
-@@ -79,6 +92,75 @@
-     converter.genEval(e);
- }
- 
-+//===----------------------------------------------------------------------===//
-+// HostClausesInsertionGuard
-+//===----------------------------------------------------------------------===//
++    if (ops.numTeamsUpper)
++      vars.push_back(ops.numTeamsUpper);
 +
-+/// If the insertion point of the builder is located inside of an omp.target
-+/// region, this RAII guard moves the insertion point to just before that
-+/// omp.target operation and then restores the original insertion point when
-+/// destroyed. If not currently inserting inside an omp.target, it remains
-+/// unchanged.
-+class HostClausesInsertionGuard {
-+public:
-+  HostClausesInsertionGuard(mlir::OpBuilder &builder) : builder(builder) {
-+    targetOp = findParentTargetOp(builder);
-+    if (targetOp) {
-+      ip = builder.saveInsertionPoint();
-+      builder.setInsertionPoint(targetOp);
-+    }
++    if (ops.numThreads)
++      vars.push_back(ops.numThreads);
++
++    if (ops.threadLimit)
++      vars.push_back(ops.threadLimit);
 +  }
 +
-+  ~HostClausesInsertionGuard() {
-+    if (ip.isSet()) {
-+      fixupExtractedHostOps();
-+      builder.restoreInsertionPoint(ip);
-+    }
++  /// Update \c ops, replacing all values with the corresponding block argument
++  /// in \c args.
++  ///
++  /// The order in which values are stored in \c args is the same as the one
++  /// used by \see collectValues().
++  void bindOperands(llvm::ArrayRef<mlir::BlockArgument> args) {
++    assert(args.size() ==
++               ops.loopLowerBounds.size() + ops.loopUpperBounds.size() +
++                   ops.loopSteps.size() + (ops.numTeamsLower ? 1 : 0) +
++                   (ops.numTeamsUpper ? 1 : 0) + (ops.numThreads ? 1 : 0) +
++                   (ops.threadLimit ? 1 : 0) &&
++           "invalid block argument list");
++    int argIndex = 0;
++    for (size_t i = 0; i < ops.loopLowerBounds.size(); ++i)
++      ops.loopLowerBounds[i] = args[argIndex++];
++
++    for (size_t i = 0; i < ops.loopUpperBounds.size(); ++i)
++      ops.loopUpperBounds[i] = args[argIndex++];
++
++    for (size_t i = 0; i < ops.loopSteps.size(); ++i)
++      ops.loopSteps[i] = args[argIndex++];
++
++    if (ops.numTeamsLower)
++      ops.numTeamsLower = args[argIndex++];
++
++    if (ops.numTeamsUpper)
++      ops.numTeamsUpper = args[argIndex++];
++
++    if (ops.numThreads)
++      ops.numThreads = args[argIndex++];
++
++    if (ops.threadLimit)
++      ops.threadLimit = args[argIndex++];
 +  }
 +
-+private:
-+  mlir::OpBuilder &builder;
-+  mlir::OpBuilder::InsertPoint ip;
-+  mlir::omp::TargetOp targetOp;
-+
-+  /// Fixup any uses of target region block arguments that we have just created
-+  /// outside of the target region, and replace them by their host values.
-+  void fixupExtractedHostOps() {
-+    auto useOutsideTargetRegion = [](mlir::OpOperand &operand) {
-+      if (mlir::Operation *owner = operand.getOwner())
-+        return !owner->getParentOfType<mlir::omp::TargetOp>();
++  /// Update \p clauseOps and \p ivOut with the corresponding host-evaluated
++  /// values and Fortran symbols, respectively, if they have already been
++  /// initialized but not yet applied.
++  ///
++  /// \returns whether an update was performed. If not, these clauses were not
++  ///          evaluated in the host device.
++  bool apply(mlir::omp::LoopNestOperands &clauseOps,
++             llvm::SmallVectorImpl<const semantics::Symbol *> &ivOut) {
++    if (iv.empty() || loopNestApplied) {
++      loopNestApplied = true;
 +      return false;
-+    };
++    }
 +
-+    mlir::OperandRange map = targetOp.getMapOperands();
-+    for (mlir::BlockArgument arg : targetOp.getRegion().getArguments()) {
-+      mlir::Value hostVal = map[arg.getArgNumber()]
-+                                .getDefiningOp<mlir::omp::MapInfoOp>()
-+                                .getVarPtr();
-+
-+      // Replace instances of omp.target block arguments used outside with their
-+      // corresponding host value.
-+      arg.replaceUsesWithIf(hostVal, [&](mlir::OpOperand &operand) -> bool {
-+        // If the use is an hlfir.declare, we need to search for the matching
-+        // one within host code.
-+        if (auto declareOp = llvm::dyn_cast_if_present<hlfir::DeclareOp>(
-+                operand.getOwner())) {
-+          if (auto hostDeclareOp = hostVal.getDefiningOp<hlfir::DeclareOp>()) {
-+            declareOp->replaceUsesWithIf(hostDeclareOp.getResults(),
-+                                         useOutsideTargetRegion);
-+          } else if (auto hostBoxOp = hostVal.getDefiningOp<fir::BoxAddrOp>()) {
-+            declareOp->replaceUsesWithIf(hostBoxOp.getVal()
-+                                             .getDefiningOp<hlfir::DeclareOp>()
-+                                             .getResults(),
-+                                         useOutsideTargetRegion);
-+          }
-+        }
-+        return useOutsideTargetRegion(operand);
-+      });
++    loopNestApplied = true;
++    clauseOps.loopLowerBounds = ops.loopLowerBounds;
++    clauseOps.loopUpperBounds = ops.loopUpperBounds;
++    clauseOps.loopSteps = ops.loopSteps;
++    ivOut.append(iv);
++    return true;
++  }
++
++  /// Update \p clauseOps with the corresponding host-evaluated values if they
++  /// have already been initialized but not yet applied.
++  ///
++  /// \returns whether an update was performed. If not, these clauses were not
++  ///          evaluated in the host device.
++  bool apply(mlir::omp::ParallelOperands &clauseOps) {
++    if (!ops.numThreads || parallelApplied) {
++      parallelApplied = true;
++      return false;
 +    }
++
++    parallelApplied = true;
++    clauseOps.numThreads = ops.numThreads;
++    return true;
 +  }
-+};
 +
- static fir::GlobalOp globalInitialization(lower::AbstractConverter &converter,
-                                           fir::FirOpBuilder &firOpBuilder,
-                                           const semantics::Symbol &sym,
-@@ -226,6 +308,27 @@
-   return storeOp;
- }
- 
-+static bool evalHasSiblings(lower::pft::Evaluation &eval) {
-+  return eval.parent.visit(common::visitors{
-+      [&](const lower::pft::Program &parent) {
-+        return parent.getUnits().size() + parent.getCommonBlocks().size() > 1;
-+      },
-+      [&](const lower::pft::Evaluation &parent) {
-+        for (auto &sibling : *parent.evaluationList)
-+          if (&sibling != &eval && !sibling.isEndStmt())
-+            return true;
-+
-+        return false;
-+      },
-+      [&](const auto &parent) {
-+        for (auto &sibling : parent.evaluationList)
-+          if (&sibling != &eval && !sibling.isEndStmt())
-+            return true;
-+
-+        return false;
-+      }});
-+}
++  /// Update \p clauseOps with the corresponding host-evaluated values if they
++  /// have already been initialized.
++  ///
++  /// \returns whether an update was performed. If not, these clauses were not
++  ///          evaluated in the host device.
++  bool apply(mlir::omp::TeamsOperands &clauseOps) {
++    if (!ops.numTeamsLower && !ops.numTeamsUpper && !ops.threadLimit)
++      return false;
 +
- // This helper function implements the functionality of "promoting"
- // non-CPTR arguments of use_device_ptr to use_device_addr
- // arguments (automagic conversion of use_device_ptr ->
-@@ -414,24 +517,6 @@
- }
- 
- static void
--genReductionVars(mlir::Operation *op, lower::AbstractConverter &converter,
--                 mlir::Location &loc,
--                 llvm::ArrayRef<const semantics::Symbol *> reductionArgs,
--                 llvm::ArrayRef<mlir::Type> reductionTypes) {
--  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
--  llvm::SmallVector<mlir::Location> blockArgLocs(reductionArgs.size(), loc);
--
--  mlir::Block *entryBlock = firOpBuilder.createBlock(
--      &op->getRegion(0), {}, reductionTypes, blockArgLocs);
--
--  // Bind the reduction arguments to their block arguments.
--  for (auto [arg, prv] :
--       llvm::zip_equal(reductionArgs, entryBlock->getArguments())) {
--    converter.bindSymbol(*arg, prv);
--  }
--}
--
--static void
- markDeclareTarget(mlir::Operation *op, lower::AbstractConverter &converter,
-                   mlir::omp::DeclareTargetCaptureClause captureClause,
-                   mlir::omp::DeclareTargetDeviceType deviceType) {
-@@ -502,11 +587,6 @@
-       : converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc),
-         eval(eval), dir(dir) {}
- 
--  OpWithBodyGenInfo &setOuterCombined(bool value) {
--    outerCombined = value;
--    return *this;
--  }
--
-   OpWithBodyGenInfo &setClauses(const List<Clause> *value) {
-     clauses = value;
-     return *this;
-@@ -518,8 +598,8 @@
-   }
- 
-   OpWithBodyGenInfo &
--  setReductions(llvm::SmallVectorImpl<const semantics::Symbol *> *value1,
--                llvm::SmallVectorImpl<mlir::Type> *value2) {
-+  setReductions(llvm::ArrayRef<const semantics::Symbol *> *value1,
-+                llvm::ArrayRef<mlir::Type> *value2) {
-     reductionSymbols = value1;
-     reductionTypes = value2;
-     return *this;
-@@ -542,16 +622,14 @@
-   lower::pft::Evaluation &eval;
-   /// [in] leaf directive for which to generate the op body.
-   llvm::omp::Directive dir;
--  /// [in] is this an outer operation - prevents privatization.
--  bool outerCombined = false;
-   /// [in] list of clauses to process.
-   const List<Clause> *clauses = nullptr;
-   /// [in] if provided, processes the construct's data-sharing attributes.
-   DataSharingProcessor *dsp = nullptr;
-   /// [in] if provided, list of reduction symbols
--  llvm::SmallVectorImpl<const semantics::Symbol *> *reductionSymbols = nullptr;
-+  llvm::ArrayRef<const semantics::Symbol *> *reductionSymbols = nullptr;
-   /// [in] if provided, list of reduction types
--  llvm::SmallVectorImpl<mlir::Type> *reductionTypes = nullptr;
-+  llvm::ArrayRef<mlir::Type> *reductionTypes = nullptr;
-   /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block
-   /// is created in the region.
-   GenOMPRegionEntryCBFn genRegionEntryCB = nullptr;
-@@ -568,12 +646,6 @@
-                            ConstructQueue::iterator item) {
-   fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder();
- 
--  auto insertMarker = [](fir::FirOpBuilder &builder) {
--    mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(),
--                                                     builder.getIndexType());
--    return undef.getDefiningOp();
--  };
--
-   // If an argument for the region is provided then create the block with that
-   // argument. Also update the symbol's address with the mlir argument value.
-   // e.g. For loops the argument is the induction variable. And all further
-@@ -586,12 +658,13 @@
-     firOpBuilder.createBlock(&op.getRegion(0));
-     return {};
-   }();
++    clauseOps.numTeamsLower = ops.numTeamsLower;
++    clauseOps.numTeamsUpper = ops.numTeamsUpper;
++    clauseOps.threadLimit = ops.threadLimit;
++    return true;
++  }
 +
-   // Mark the earliest insertion point.
--  mlir::Operation *marker = insertMarker(firOpBuilder);
-+  auto marker = firOpBuilder.saveInsertionPoint();
- 
-   // If it is an unstructured region and is not the outer region of a combined
-   // construct, create empty blocks for all evaluations.
--  if (info.eval.lowerAsUnstructured() && !info.outerCombined)
-+  if (info.eval.lowerAsUnstructured())
-     lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>(
-         firOpBuilder, info.eval.getNestedEvaluations());
- 
-@@ -599,22 +672,23 @@
-   // code will use the right symbols.
-   bool isLoop = llvm::omp::getDirectiveAssociation(info.dir) ==
-                 llvm::omp::Association::Loop;
--  bool privatize = info.clauses && !info.outerCombined;
-+  bool privatize = info.clauses;
- 
--  firOpBuilder.setInsertionPoint(marker);
-+  firOpBuilder.restoreInsertionPoint(marker);
-   std::optional<DataSharingProcessor> tempDsp;
-   if (privatize) {
-     if (!info.dsp) {
-       tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval,
-                       Fortran::lower::omp::isLastItemInQueue(item, queue));
-       tempDsp->processStep1();
-+      tempDsp->processStep2();
-     }
-   }
- 
-   if (info.dir == llvm::omp::Directive::OMPD_parallel) {
-     threadPrivatizeVars(info.converter, info.eval);
-     if (info.clauses) {
--      firOpBuilder.setInsertionPoint(marker);
-+      firOpBuilder.restoreInsertionPoint(marker);
-       ClauseProcessor(info.converter, info.semaCtx, *info.clauses)
-           .processCopyin();
-     }
-@@ -622,7 +696,7 @@
++private:
++  mlir::omp::HostEvaluatedOperands ops;
++  llvm::SmallVector<const semantics::Symbol *> iv;
++  bool loopNestApplied = false, parallelApplied = false;
++};
+ } // namespace
  
-   if (ConstructQueue::iterator next = std::next(item); next != queue.end()) {
-     genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval,
--                   info.loc, queue, next);
-+                   info.loc, queue, next, info.dsp);
-   } else {
-     // genFIR(Evaluation&) tries to patch up unterminated blocks, causing
-     // a lot of complications for our approach if the terminator generation
-@@ -630,7 +704,7 @@
-     // delete it.
-     firOpBuilder.setInsertionPointToEnd(&op.getRegion(0).back());
-     auto *temp = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc);
--    firOpBuilder.setInsertionPointAfter(marker);
-+    firOpBuilder.restoreInsertionPoint(marker);
-     genNestedEvaluations(info.converter, info.eval);
-     temp->erase();
-   }
-@@ -695,17 +769,17 @@
+-static void genOMPDispatch(lower::AbstractConverter &converter,
+-                           lower::SymMap &symTable,
+-                           semantics::SemanticsContext &semaCtx,
+-                           lower::pft::Evaluation &eval, mlir::Location loc,
+-                           const ConstructQueue &queue,
+-                           ConstructQueue::const_iterator item);
++/// Stack of \see HostEvalInfo to represent the current nest of \c omp.target
++/// operations being created.
++///
++/// The current implementation prevents nested 'target' regions from breaking
++/// the handling of the outer region by keeping a stack of information
++/// structures, but it will probably still require some further work to support
++/// reverse offloading.
++static llvm::SmallVector<HostEvalInfo, 0> hostEvalInfo;
  
-       if (!info.dsp) {
-         assert(tempDsp.has_value());
--        tempDsp->processStep2(privatizationTopLevelOp, isLoop);
-+        tempDsp->processStep3(privatizationTopLevelOp, isLoop);
-       } else {
-         if (isLoop && regionArgs.size() > 0)
-           info.dsp->setLoopIV(info.converter.getSymbolAddress(*regionArgs[0]));
--        info.dsp->processStep2(privatizationTopLevelOp, isLoop);
-+        info.dsp->processStep3(privatizationTopLevelOp, isLoop);
-       }
-     }
-   }
+ /// Bind symbols to their corresponding entry block arguments.
+ ///
+@@ -220,6 +362,8 @@
+   };
  
--  firOpBuilder.setInsertionPointAfter(marker);
--  marker->erase();
-+  firOpBuilder.setInsertionPoint(marker.getBlock(),
-+                                 std::prev(marker.getPoint()));
+   // Process in clause name alphabetical order to match block arguments order.
++  // Do not bind host_eval variables because they cannot be used inside of the
++  // corresponding region, except for very specific cases handled separately.
+   bindPrivateLike(args.inReduction.syms, args.inReduction.vars,
+                   op.getInReductionBlockArgs());
+   bindMapLike(args.map.syms, op.getMapBlockArgs());
+@@ -257,6 +401,246 @@
+   });
  }
  
- static void genBodyOfTargetDataOp(
-@@ -808,6 +882,9 @@
-   auto *regionBlock = firOpBuilder.createBlock(&region, {}, allRegionArgTypes,
-                                                allRegionArgLocs);
- 
-+  if (!enableDelayedPrivatizationStaging)
-+    dsp.processStep2();
++/// Get the directive enumeration value corresponding to the given OpenMP
++/// construct PFT node.
++llvm::omp::Directive
++extractOmpDirective(const parser::OpenMPConstruct &ompConstruct) {
++  return common::visit(
++      common::visitors{
++          [](const parser::OpenMPAllocatorsConstruct &c) {
++            return llvm::omp::OMPD_allocators;
++          },
++          [](const parser::OpenMPAtomicConstruct &c) {
++            return llvm::omp::OMPD_atomic;
++          },
++          [](const parser::OpenMPBlockConstruct &c) {
++            return std::get<parser::OmpBlockDirective>(
++                       std::get<parser::OmpBeginBlockDirective>(c.t).t)
++                .v;
++          },
++          [](const parser::OpenMPCriticalConstruct &c) {
++            return llvm::omp::OMPD_critical;
++          },
++          [](const parser::OpenMPDeclarativeAllocate &c) {
++            return llvm::omp::OMPD_allocate;
++          },
++          [](const parser::OpenMPExecutableAllocate &c) {
++            return llvm::omp::OMPD_allocate;
++          },
++          [](const parser::OpenMPLoopConstruct &c) {
++            return std::get<parser::OmpLoopDirective>(
++                       std::get<parser::OmpBeginLoopDirective>(c.t).t)
++                .v;
++          },
++          [](const parser::OpenMPSectionConstruct &c) {
++            return llvm::omp::OMPD_section;
++          },
++          [](const parser::OpenMPSectionsConstruct &c) {
++            return std::get<parser::OmpSectionsDirective>(
++                       std::get<parser::OmpBeginSectionsDirective>(c.t).t)
++                .v;
++          },
++          [](const parser::OpenMPStandaloneConstruct &c) {
++            return common::visit(
++                common::visitors{
++                    [](const parser::OpenMPSimpleStandaloneConstruct &c) {
++                      return std::get<parser::OmpSimpleStandaloneDirective>(c.t)
++                          .v;
++                    },
++                    [](const parser::OpenMPFlushConstruct &c) {
++                      return llvm::omp::OMPD_flush;
++                    },
++                    [](const parser::OpenMPCancelConstruct &c) {
++                      return llvm::omp::OMPD_cancel;
++                    },
++                    [](const parser::OpenMPCancellationPointConstruct &c) {
++                      return llvm::omp::OMPD_cancellation_point;
++                    },
++                    [](const parser::OpenMPDepobjConstruct &c) {
++                      return llvm::omp::OMPD_depobj;
++                    }},
++                c.u);
++          }},
++      ompConstruct.u);
++}
 +
-   // Clones the `bounds` placing them inside the target region and returns them.
-   auto cloneBound = [&](mlir::Value bound) {
-     if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
-@@ -871,7 +948,7 @@
-   }
++/// Populate the global \see hostEvalInfo after processing clauses for the given
++/// \p eval OpenMP target construct, or nested constructs, if these must be
++/// evaluated outside of the target region per the spec.
++///
++/// In particular, this will ensure that in 'target teams' and equivalent nested
++/// constructs, the \c thread_limit and \c num_teams clauses will be evaluated
++/// in the host. Additionally, loop bounds, steps and the \c num_threads clause
++/// will also be evaluated in the host if a target SPMD construct is detected
++/// (i.e. 'target teams distribute parallel do [simd]' or equivalent nesting).
++///
++/// The result, stored as a global, is intended to be used to populate the \c
++/// host_eval operands of the associated \c omp.target operation, and also to be
++/// checked and used by later lowering steps to populate the corresponding
++/// operands of the \c omp.teams, \c omp.parallel or \c omp.loop_nest
++/// operations.
++static void processHostEvalClauses(lower::AbstractConverter &converter,
++                                   semantics::SemanticsContext &semaCtx,
++                                   lower::StatementContext &stmtCtx,
++                                   lower::pft::Evaluation &eval,
++                                   mlir::Location loc) {
++  // Obtain the list of clauses of the given OpenMP block or loop construct
++  // evaluation. Other evaluations passed to this lambda keep `clauses`
++  // unchanged.
++  auto extractClauses = [&semaCtx](lower::pft::Evaluation &eval,
++                                   List<Clause> &clauses) {
++    const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
++    if (!ompEval)
++      return;
++
++    const parser::OmpClauseList *beginClauseList = nullptr;
++    const parser::OmpClauseList *endClauseList = nullptr;
++    common::visit(
++        common::visitors{
++            [&](const parser::OpenMPBlockConstruct &ompConstruct) {
++              const auto &beginDirective =
++                  std::get<parser::OmpBeginBlockDirective>(ompConstruct.t);
++              beginClauseList =
++                  &std::get<parser::OmpClauseList>(beginDirective.t);
++              endClauseList = &std::get<parser::OmpClauseList>(
++                  std::get<parser::OmpEndBlockDirective>(ompConstruct.t).t);
++            },
++            [&](const parser::OpenMPLoopConstruct &ompConstruct) {
++              const auto &beginDirective =
++                  std::get<parser::OmpBeginLoopDirective>(ompConstruct.t);
++              beginClauseList =
++                  &std::get<parser::OmpClauseList>(beginDirective.t);
++
++              if (auto &endDirective =
++                      std::get<std::optional<parser::OmpEndLoopDirective>>(
++                          ompConstruct.t))
++                endClauseList =
++                    &std::get<parser::OmpClauseList>(endDirective->t);
++            },
++            [&](const auto &) {}},
++        ompEval->u);
++
++    assert(beginClauseList && "expected begin directive");
++    clauses.append(makeClauses(*beginClauseList, semaCtx));
++
++    if (endClauseList)
++      clauses.append(makeClauses(*endClauseList, semaCtx));
++  };
++
++  // Return the directive that is immediately nested inside of the given
++  // `parent` evaluation, if it is its only non-end-statement nested evaluation
++  // and it represents an OpenMP construct.
++  auto extractOnlyOmpNestedDir = [](lower::pft::Evaluation &parent)
++      -> std::optional<llvm::omp::Directive> {
++    if (!parent.hasNestedEvaluations())
++      return std::nullopt;
++
++    llvm::omp::Directive dir;
++    auto &nested = parent.getFirstNestedEvaluation();
++    if (const auto *ompEval = nested.getIf<parser::OpenMPConstruct>())
++      dir = extractOmpDirective(*ompEval);
++    else
++      return std::nullopt;
++
++    for (auto &sibling : parent.getNestedEvaluations())
++      if (&sibling != &nested && !sibling.isEndStmt())
++        return std::nullopt;
++
++    return dir;
++  };
++
++  // Process the given evaluation assuming it's part of a 'target' construct or
++  // captured by one, and store results in the global `hostEvalInfo`.
++  std::function<void(lower::pft::Evaluation &, const List<Clause> &)>
++      processEval;
++  processEval = [&](lower::pft::Evaluation &eval, const List<Clause> &clauses) {
++    using namespace llvm::omp;
++    ClauseProcessor cp(converter, semaCtx, clauses);
++
++    // Call `processEval` recursively with the immediately nested evaluation and
++    // its corresponding clauses if there is a single nested evaluation
++    // representing an OpenMP directive that passes the given test.
++    auto processSingleNestedIf = [&](llvm::function_ref<bool(Directive)> test) {
++      std::optional<Directive> nestedDir = extractOnlyOmpNestedDir(eval);
++      if (!nestedDir || !test(*nestedDir))
++        return;
++
++      lower::pft::Evaluation &nestedEval = eval.getFirstNestedEvaluation();
++      List<lower::omp::Clause> nestedClauses;
++      extractClauses(nestedEval, nestedClauses);
++      processEval(nestedEval, nestedClauses);
++    };
++
++    const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
++    if (!ompEval)
++      return;
++
++    HostEvalInfo &hostInfo = hostEvalInfo.back();
++
++    switch (extractOmpDirective(*ompEval)) {
++    // Cases where 'teams' and target SPMD clauses might be present.
++    case OMPD_teams_distribute_parallel_do:
++    case OMPD_teams_distribute_parallel_do_simd:
++      cp.processThreadLimit(stmtCtx, hostInfo.ops);
++      [[fallthrough]];
++    case OMPD_target_teams_distribute_parallel_do:
++    case OMPD_target_teams_distribute_parallel_do_simd:
++      cp.processNumTeams(stmtCtx, hostInfo.ops);
++      [[fallthrough]];
++    case OMPD_distribute_parallel_do:
++    case OMPD_distribute_parallel_do_simd:
++      cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
++      cp.processNumThreads(stmtCtx, hostInfo.ops);
++      break;
++
++    // Cases where 'teams' clauses might be present, and target SPMD is
++    // possible by looking at nested evaluations.
++    case OMPD_teams:
++      cp.processThreadLimit(stmtCtx, hostInfo.ops);
++      [[fallthrough]];
++    case OMPD_target_teams:
++      cp.processNumTeams(stmtCtx, hostInfo.ops);
++      processSingleNestedIf([](Directive nestedDir) {
++        return nestedDir == OMPD_distribute_parallel_do ||
++               nestedDir == OMPD_distribute_parallel_do_simd;
++      });
++      break;
++
++    // Cases where only 'teams' host-evaluated clauses might be present.
++    case OMPD_teams_distribute:
++    case OMPD_teams_distribute_simd:
++      cp.processThreadLimit(stmtCtx, hostInfo.ops);
++      [[fallthrough]];
++    case OMPD_target_teams_distribute:
++    case OMPD_target_teams_distribute_simd:
++      cp.processNumTeams(stmtCtx, hostInfo.ops);
++      break;
++
++    // Standalone 'target' case.
++    case OMPD_target: {
++      processSingleNestedIf(
++          [](Directive nestedDir) { return topTeamsSet.test(nestedDir); });
++      break;
++    }
++    default:
++      break;
++    }
++  };
++
++  assert(!hostEvalInfo.empty() && "expected HOST_EVAL info structure");
++
++  const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
++  assert(ompEval &&
++         llvm::omp::allTargetSet.test(extractOmpDirective(*ompEval)) &&
++         "expected TARGET construct evaluation");
++
++  // Use the whole list of clauses passed to the construct here, rather than the
++  // ones only applied to omp.target.
++  List<lower::omp::Clause> clauses;
++  extractClauses(eval, clauses);
++  processEval(eval, clauses);
++}
++
+ static lower::pft::Evaluation *
+ getCollapsedLoopEval(lower::pft::Evaluation &eval, int collapseValue) {
+   // Return the Evaluation of the innermost collapsed loop, or the current one
+@@ -639,11 +1023,11 @@
+ 
+   llvm::SmallVector<mlir::Type> types;
+   llvm::SmallVector<mlir::Location> locs;
+-  unsigned numVars = args.inReduction.vars.size() + args.map.vars.size() +
+-                     args.priv.vars.size() + args.reduction.vars.size() +
+-                     args.taskReduction.vars.size() +
+-                     args.useDeviceAddr.vars.size() +
+-                     args.useDevicePtr.vars.size();
++  unsigned numVars =
++      args.hostEvalVars.size() + args.inReduction.vars.size() +
++      args.map.vars.size() + args.priv.vars.size() +
++      args.reduction.vars.size() + args.taskReduction.vars.size() +
++      args.useDeviceAddr.vars.size() + args.useDevicePtr.vars.size();
+   types.reserve(numVars);
+   locs.reserve(numVars);
+ 
+@@ -656,6 +1040,7 @@
+ 
+   // Populate block arguments in clause name alphabetical order to match
+   // expected order by the BlockArgOpenMPOpInterface.
++  extractTypeLoc(args.hostEvalVars);
+   extractTypeLoc(args.inReduction.vars);
+   extractTypeLoc(args.map.vars);
+   extractTypeLoc(args.priv.vars);
+@@ -784,6 +1169,7 @@
+     firOpBuilder.createBlock(&op.getRegion(0));
+     return {};
+   }();
++
+   // Mark the earliest insertion point.
+   mlir::Operation *marker = insertMarker(firOpBuilder);
+ 
+@@ -806,6 +1192,7 @@
+     tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval,
+                     Fortran::lower::omp::isLastItemInQueue(item, queue));
+     tempDsp->processStep1();
++    tempDsp->processStep2();
+   }
+ 
+   if (info.dir == llvm::omp::Directive::OMPD_parallel) {
+@@ -895,14 +1282,14 @@
  
-   for (auto [argIndex, argSymbol] :
--       llvm::enumerate(dsp.getAllSymbolsToPrivatize())) {
-+       llvm::enumerate(dsp.getDelayedPrivSyms())) {
-     argIndex = mapSyms.size() + argIndex;
+       if (!info.dsp) {
+         assert(tempDsp.has_value());
+-        tempDsp->processStep2(privatizationTopLevelOp, isLoop);
++        tempDsp->processStep3(privatizationTopLevelOp, isLoop);
+       } else {
+         if (isLoop && regionArgs.size() > 0) {
+           for (const auto &regionArg : regionArgs) {
+             info.dsp->pushLoopIV(info.converter.getSymbolAddress(*regionArg));
+           }
+         }
+-        info.dsp->processStep2(privatizationTopLevelOp, isLoop);
++        info.dsp->processStep3(privatizationTopLevelOp, isLoop);
+       }
+     }
+   }
+@@ -997,7 +1384,13 @@
  
-     const mlir::BlockArgument &arg = region.getArgument(argIndex);
-@@ -962,7 +1039,7 @@
+   mlir::Region &region = targetOp.getRegion();
+   mlir::Block *entryBlock = genEntryBlock(converter, args, region);
++
++  if (!enableDelayedPrivatizationStaging)
++    dsp.processStep2();
++
+   bindEntryBlockArgs(converter, targetOp, args);
++  if (!hostEvalInfo.empty())
++    hostEvalInfo.back().bindOperands(argIface.getHostEvalBlockArgs());
+ 
+   // Check if cloning the bounds introduced any dependency on the outer region.
+   // If so, then either clone them as well if they are MemoryEffectFree, or else
+@@ -1008,9 +1401,11 @@
+   while (!valuesDefinedAbove.empty()) {
+     for (mlir::Value val : valuesDefinedAbove) {
+       mlir::Operation *valOp = val.getDefiningOp();
++      assert(valOp != nullptr);
+       if (mlir::isMemoryEffectFree(valOp)) {
+         mlir::Operation *clonedOp = valOp->clone();
+         entryBlock->push_front(clonedOp);
++        assert(clonedOp->getNumResults() == 1);
+         val.replaceUsesWithIf(clonedOp->getResult(0),
+                               [entryBlock](mlir::OpOperand &use) {
+                                 return use.getOwner()->getBlock() == entryBlock;
+@@ -1096,7 +1491,7 @@
      genNestedEvaluations(converter, eval);
    }
  
@@ -5152,70 +5689,72 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/OpenMP.cp
  }
  
  template <typename OpTy, typename... Args>
-@@ -1026,20 +1103,28 @@
- static void genParallelClauses(
-     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
-     lower::StatementContext &stmtCtx, const List<Clause> &clauses,
--    mlir::Location loc, bool processReduction,
-+    mlir::Location loc, bool evalOutsideTarget,
-     mlir::omp::ParallelClauseOps &clauseOps,
-+    mlir::omp::NumThreadsClauseOps &numThreadsClauseOps,
-     llvm::SmallVectorImpl<mlir::Type> &reductionTypes,
-     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
+@@ -1173,7 +1568,10 @@
+                    mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps,
+                    llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
+   ClauseProcessor cp(converter, semaCtx, clauses);
+-  cp.processCollapse(loc, eval, clauseOps, iv);
++
++  if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps, iv))
++    cp.processCollapse(loc, eval, clauseOps, iv);
++
+   clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr();
+ }
+ 
+@@ -1215,7 +1613,10 @@
    ClauseProcessor cp(converter, semaCtx, clauses);
    cp.processAllocate(clauseOps);
-   cp.processDefault();
    cp.processIf(llvm::omp::Directive::OMPD_parallel, clauseOps);
 -  cp.processNumThreads(stmtCtx, clauseOps);
--  cp.processProcBind(clauseOps);
- 
--  if (processReduction) {
--    cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms);
-+  // Don't store num_threads clause operators into clauseOps because then they
-+  // would always be added to the omp.parallel operation during its creation.
-+  // We might need to attach them to the parent omp.target.
-+  if (evalOutsideTarget) {
-+    HostClausesInsertionGuard guard(converter.getFirOpBuilder());
-+    cp.processNumThreads(stmtCtx, numThreadsClauseOps);
-+  } else {
-+    cp.processNumThreads(stmtCtx, numThreadsClauseOps);
-   }
 +
-+  cp.processProcBind(clauseOps);
-+  cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms);
++  if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps))
++    cp.processNumThreads(stmtCtx, clauseOps);
++
+   cp.processProcBind(clauseOps);
+   cp.processReduction(loc, clauseOps, reductionSyms);
+ }
+@@ -1257,13 +1658,12 @@
+   cp.processAllocate(clauseOps);
+   cp.processCopyprivate(loc, clauseOps);
+   cp.processNowait(clauseOps);
+-  // TODO Support delayed privatization.
  }
  
- static void genSectionsClauses(lower::AbstractConverter &converter,
-@@ -1083,7 +1168,7 @@
  static void genTargetClauses(
      lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
-     lower::StatementContext &stmtCtx, const List<Clause> &clauses,
--    mlir::Location loc, bool processHostOnlyClauses, bool processReduction,
-+    mlir::Location loc, bool processHostOnlyClauses,
-     mlir::omp::TargetClauseOps &clauseOps,
-     llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms,
-     llvm::SmallVectorImpl<mlir::Location> &mapLocs,
-@@ -1103,19 +1188,22 @@
-   cp.processIsDevicePtr(clauseOps, devicePtrTypes, devicePtrLocs,
-                         devicePtrSyms);
-   cp.processMap(loc, stmtCtx, clauseOps, &mapSyms, &mapLocs, &mapTypes);
--  cp.processThreadLimit(stmtCtx, clauseOps);
- 
-   if (processHostOnlyClauses)
-     cp.processNowait(clauseOps);
+-    lower::StatementContext &stmtCtx, const List<Clause> &clauses,
+-    mlir::Location loc, bool processHostOnlyClauses,
++    lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval,
++    const List<Clause> &clauses, mlir::Location loc,
+     mlir::omp::TargetOperands &clauseOps,
+     llvm::SmallVectorImpl<const semantics::Symbol *> &hasDeviceAddrSyms,
+     llvm::SmallVectorImpl<const semantics::Symbol *> &isDevicePtrSyms,
+@@ -1272,22 +1672,27 @@
+   cp.processDepend(clauseOps);
+   cp.processDevice(stmtCtx, clauseOps);
+   cp.processHasDeviceAddr(clauseOps, hasDeviceAddrSyms);
++  if (!hostEvalInfo.empty()) {
++    // Only process host_eval if compiling for the host device.
++    processHostEvalClauses(converter, semaCtx, stmtCtx, eval, loc);
++    hostEvalInfo.back().collectValues(clauseOps.hostEvalVars);
++  }
+   cp.processIf(llvm::omp::Directive::OMPD_target, clauseOps);
+   cp.processIsDevicePtr(clauseOps, isDevicePtrSyms);
+   cp.processMap(loc, stmtCtx, clauseOps, &mapSyms);
+-
+-  if (processHostOnlyClauses)
+-    cp.processNowait(clauseOps);
+-
++  cp.processNowait(clauseOps);
+   cp.processThreadLimit(stmtCtx, clauseOps);
  
-+  cp.processThreadLimit(stmtCtx, clauseOps);
-+
    cp.processTODO<clause::Allocate, clause::Defaultmap, clause::Firstprivate,
--                 clause::InReduction, clause::Reduction,
--                 clause::UsesAllocators>(loc,
--                                         llvm::omp::Directive::OMPD_target);
-+                 clause::InReduction, clause::UsesAllocators>(
-+      loc, llvm::omp::Directive::OMPD_target);
-+
+                  clause::InReduction, clause::UsesAllocators>(
+       loc, llvm::omp::Directive::OMPD_target);
+ 
 +  // TODO: Re-enable check after removing downstream early privatization support
 +  // for `target`.
- 
++
    // `target private(..)` is only supported in delayed privatization mode.
 -  if (!enableDelayedPrivatizationStaging)
 -    cp.processTODO<clause::Private>(loc, llvm::omp::Directive::OMPD_target);
@@ -5224,425 +5763,134 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/OpenMP.cp
  }
  
  static void genTargetDataClauses(
-@@ -1207,11 +1295,14 @@
-       loc, llvm::omp::Directive::OMPD_taskwait);
+@@ -1347,7 +1752,6 @@
+   cp.processMergeable(clauseOps);
+   cp.processPriority(stmtCtx, clauseOps);
+   cp.processUntied(clauseOps);
+-  // TODO Support delayed privatization.
+ 
+   cp.processTODO<clause::Affinity, clause::Detach, clause::InReduction,
+                  clause::Mergeable>(loc, llvm::omp::Directive::OMPD_task);
+@@ -1381,19 +1785,21 @@
+   cp.processNowait(clauseOps);
  }
  
 -static void genTeamsClauses(lower::AbstractConverter &converter,
 -                            semantics::SemanticsContext &semaCtx,
 -                            lower::StatementContext &stmtCtx,
 -                            const List<Clause> &clauses, mlir::Location loc,
--                            mlir::omp::TeamsClauseOps &clauseOps) {
-+static void
-+genTeamsClauses(lower::AbstractConverter &converter,
-+                semantics::SemanticsContext &semaCtx,
-+                lower::StatementContext &stmtCtx, const List<Clause> &clauses,
-+                mlir::Location loc, bool evalOutsideTarget,
-+                mlir::omp::TeamsClauseOps &clauseOps,
-+                mlir::omp::NumTeamsClauseOps &numTeamsClauseOps,
-+                mlir::omp::ThreadLimitClauseOps &threadLimitClauseOps) {
+-                            mlir::omp::TeamsOperands &clauseOps) {
++static void genTeamsClauses(
++    lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
++    lower::StatementContext &stmtCtx, const List<Clause> &clauses,
++    mlir::Location loc, mlir::omp::TeamsOperands &clauseOps,
++    llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
    ClauseProcessor cp(converter, semaCtx, clauses);
    cp.processAllocate(clauseOps);
-   cp.processDefault();
-@@ -1220,7 +1311,21 @@
-   cp.processThreadLimit(stmtCtx, clauseOps);
-   // TODO Support delayed privatization.
+   cp.processIf(llvm::omp::Directive::OMPD_teams, clauseOps);
+-  cp.processNumTeams(stmtCtx, clauseOps);
+-  cp.processThreadLimit(stmtCtx, clauseOps);
+-  // TODO Support delayed privatization.
  
 -  cp.processTODO<clause::Reduction>(loc, llvm::omp::Directive::OMPD_teams);
-+  // Evaluate NUM_TEAMS and THREAD_LIMIT on the host device, if currently inside
-+  // of an omp.target operation.
-+  // Don't store num_teams and thread_limit clause operators into clauseOps
-+  // because then they would always be added to the omp.teams operation during
-+  // its creation. We might need to attach them to the parent omp.target.
-+  if (evalOutsideTarget) {
-+    HostClausesInsertionGuard guard(converter.getFirOpBuilder());
-+    cp.processNumTeams(stmtCtx, numTeamsClauseOps);
-+    cp.processThreadLimit(stmtCtx, threadLimitClauseOps);
-+  } else {
-+    cp.processNumTeams(stmtCtx, numTeamsClauseOps);
-+    cp.processThreadLimit(stmtCtx, threadLimitClauseOps);
++  if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps)) {
++    cp.processNumTeams(stmtCtx, clauseOps);
++    cp.processThreadLimit(stmtCtx, clauseOps);
 +  }
 +
-+  // cp.processTODO<clause::Reduction>(loc, llvm::omp::Directive::OMPD_teams);
++  cp.processReduction(loc, clauseOps, reductionSyms);
  }
  
  static void genWsloopClauses(
-@@ -1284,12 +1389,24 @@
- }
- 
- static mlir::omp::DistributeOp
--genDistributeOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
--                semantics::SemanticsContext &semaCtx,
--                lower::pft::Evaluation &eval, mlir::Location loc,
--                const ConstructQueue &queue, ConstructQueue::iterator item) {
--  TODO(loc, "Distribute construct");
--  return nullptr;
-+genDistributeWrapperOp(lower::AbstractConverter &converter,
-+                       semantics::SemanticsContext &semaCtx,
-+                       lower::pft::Evaluation &eval, mlir::Location loc,
-+                       const mlir::omp::DistributeClauseOps &clauseOps,
-+                       DataSharingProcessor &dsp) {
-+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-+
-+  // Create omp.distribute wrapper.
-+  auto distributeOp =
-+      firOpBuilder.create<mlir::omp::DistributeOp>(loc, clauseOps);
-+
-+  // TODO: Populate entry block arguments with reduction variables.
-+  firOpBuilder.createBlock(&distributeOp.getRegion());
-+
-+  firOpBuilder.setInsertionPoint(
-+      lower::genOpenMPTerminator(firOpBuilder, distributeOp, loc));
-+
-+  return distributeOp;
- }
- 
- static mlir::omp::FlushOp
-@@ -1305,6 +1422,55 @@
-       converter.getCurrentLocation(), operandRange);
- }
- 
-+static mlir::omp::LoopNestOp
-+genLoopNestOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
-+              semantics::SemanticsContext &semaCtx,
-+              lower::pft::Evaluation &eval, mlir::Location loc,
-+              const ConstructQueue &queue, ConstructQueue::iterator item,
-+              mlir::omp::LoopNestClauseOps &clauseOps,
-+              llvm::ArrayRef<const semantics::Symbol *> iv,
-+              llvm::ArrayRef<const semantics::Symbol *> wrapperSyms,
-+              llvm::ArrayRef<mlir::BlockArgument> wrapperArgs,
-+              llvm::omp::Directive directive, DataSharingProcessor &dsp) {
-+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-+
-+  auto ivCallback = [&](mlir::Operation *op) {
-+    genLoopVars(op, converter, loc, iv, wrapperSyms, wrapperArgs);
-+    return llvm::SmallVector<const semantics::Symbol *>(iv);
-+  };
-+
-+  auto *nestedEval =
-+      getCollapsedLoopEval(eval, getCollapseValue(item->clauses));
+@@ -1478,6 +1884,7 @@
+         std::pair<mlir::omp::BlockArgOpenMPOpInterface, const EntryBlockArgs &>>
+         wrapperArgs,
+     llvm::omp::Directive directive, DataSharingProcessor &dsp) {
++
+   auto ivCallback = [&](mlir::Operation *op) {
+     genLoopVars(op, converter, loc, iv, wrapperArgs);
+     return llvm::SmallVector<const semantics::Symbol *>(iv);
+@@ -1486,13 +1893,15 @@
+   auto *nestedEval =
+       getCollapsedLoopEval(eval, getCollapseValue(item->clauses));
+ 
+-  return genOpWithBody<mlir::omp::LoopNestOp>(
 +  auto loopNestOp = genOpWithBody<mlir::omp::LoopNestOp>(
-+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval,
-+                        directive)
-+          .setClauses(&item->clauses)
-+          .setDataSharingProcessor(&dsp)
-+          .setGenRegionEntryCb(ivCallback),
-+      queue, item, clauseOps);
-+
-+  // Create trip_count if inside of omp.target and this is host compilation
-+  auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
-+      firOpBuilder.getModule().getOperation());
-+  auto targetOp = loopNestOp->getParentOfType<mlir::omp::TargetOp>();
-+
-+  if (offloadMod && targetOp && !offloadMod.getIsTargetDevice() &&
-+      targetOp.isTargetSPMDLoop()) {
-+    // Lower loop bounds and step, and process collapsing again, putting lowered
-+    // values outside of omp.target this time. This enables calculating and
-+    // accessing the trip count in the host, which is needed when lowering to
-+    // LLVM IR via the OMPIRBuilder.
-+    HostClausesInsertionGuard guard(firOpBuilder);
-+    mlir::omp::CollapseClauseOps collapseClauseOps;
-+    llvm::SmallVector<const semantics::Symbol *> iv;
-+    ClauseProcessor cp(converter, semaCtx, item->clauses);
-+    cp.processCollapse(loc, eval, collapseClauseOps, iv);
-+    targetOp.getTripCountMutable().assign(calculateTripCount(
-+        converter.getFirOpBuilder(), loc, collapseClauseOps));
-+  }
-+  return loopNestOp;
-+}
-+
- static mlir::omp::MasterOp
- genMasterOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
-             semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-@@ -1344,40 +1510,56 @@
-               semantics::SemanticsContext &semaCtx,
-               lower::pft::Evaluation &eval, mlir::Location loc,
-               const ConstructQueue &queue, ConstructQueue::iterator item,
--              bool outerCombined = false) {
-+              mlir::omp::ParallelClauseOps &clauseOps,
-+              mlir::omp::NumThreadsClauseOps &numThreadsClauseOps,
-+              llvm::ArrayRef<const semantics::Symbol *> reductionSyms,
-+              llvm::ArrayRef<mlir::Type> reductionTypes,
-+              mlir::omp::TargetOp parentTarget = nullptr) {
-   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
--  lower::StatementContext stmtCtx;
--  mlir::omp::ParallelClauseOps clauseOps;
--  llvm::SmallVector<mlir::Type> reductionTypes;
--  llvm::SmallVector<const semantics::Symbol *> reductionSyms;
--  genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
--                     /*processReduction=*/!outerCombined, clauseOps,
--                     reductionTypes, reductionSyms);
- 
-   auto reductionCallback = [&](mlir::Operation *op) {
--    genReductionVars(op, converter, loc, reductionSyms, reductionTypes);
--    return reductionSyms;
-+    llvm::SmallVector<mlir::Location> blockArgLocs(reductionSyms.size(), loc);
-+
-+    mlir::Block *entryBlock = firOpBuilder.createBlock(
-+        &op->getRegion(0), {}, reductionTypes, blockArgLocs);
-+
-+    // Bind the reduction arguments to their block arguments.
-+    for (auto [arg, prv] :
-+         llvm::zip_equal(reductionSyms, entryBlock->getArguments()))
-+      converter.bindSymbol(*arg, prv);
-+    return llvm::SmallVector<const semantics::Symbol *>(reductionSyms);
-   };
- 
-   OpWithBodyGenInfo genInfo =
-       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
-                         llvm::omp::Directive::OMPD_parallel)
--          .setOuterCombined(outerCombined)
+       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval,
+                         directive)
            .setClauses(&item->clauses)
-           .setReductions(&reductionSyms, &reductionTypes)
-           .setGenRegionEntryCb(reductionCallback);
- 
--  if (!enableDelayedPrivatization)
--    return genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item,
--                                                clauseOps);
-+  if (!enableDelayedPrivatization) {
-+    auto parallelOp =
-+        genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item, clauseOps);
-+    if (numThreadsClauseOps.numThreadsVar) {
-+      if (parentTarget)
-+        parentTarget.getNumThreadsMutable().assign(
-+            numThreadsClauseOps.numThreadsVar);
-+      else
-+        parallelOp.getNumThreadsVarMutable().assign(
-+            numThreadsClauseOps.numThreadsVar);
-+    }
-+    return parallelOp;
-+  }
+           .setDataSharingProcessor(&dsp)
+           .setGenRegionEntryCb(ivCallback),
+       queue, item, clauseOps);
++
++  return loopNestOp;
+ }
  
--  bool privatize = !outerCombined;
+ static void genLoopOp(lower::AbstractConverter &converter,
+@@ -1509,7 +1918,8 @@
    DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
-                            lower::omp::isLastItemInQueue(item, queue),
+                            /*shouldCollectPreDeterminedSymbols=*/true,
                             /*useDelayedPrivatization=*/true, &symTable);
+-  dsp.processStep1(&loopClauseOps);
 +  dsp.processStep1();
-+  dsp.processStep2();
- 
--  if (privatize)
--    dsp.processStep1(&clauseOps);
-+  const auto &privateClauseOps = dsp.getPrivateClauseOps();
-+  clauseOps.privateVars = privateClauseOps.privateVars;
-+  clauseOps.privatizers = privateClauseOps.privatizers;
- 
-   auto genRegionEntryCB = [&](mlir::Operation *op) {
-     auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);
-@@ -1386,7 +1568,7 @@
-         clauseOps.reductionVars.size(), loc);
- 
-     llvm::SmallVector<mlir::Type> allRegionArgTypes;
--    mergePrivateVarsInfo(parallelOp, llvm::ArrayRef(reductionTypes),
-+    mergePrivateVarsInfo(parallelOp, reductionTypes,
-                          llvm::function_ref<mlir::Type(mlir::Value)>{
-                              [](mlir::Value v) { return v.getType(); }},
-                          allRegionArgTypes);
-@@ -1401,9 +1583,9 @@
-     firOpBuilder.createBlock(&region, /*insertPt=*/{}, allRegionArgTypes,
-                              allRegionArgLocs);
- 
--    llvm::SmallVector<const semantics::Symbol *> allSymbols = reductionSyms;
--    allSymbols.append(dsp.getAllSymbolsToPrivatize().begin(),
--                      dsp.getAllSymbolsToPrivatize().end());
-+    llvm::SmallVector<const semantics::Symbol *> allSymbols(reductionSyms);
-+    allSymbols.append(dsp.getDelayedPrivSyms().begin(),
-+                      dsp.getDelayedPrivSyms().end());
- 
-     for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
-       converter.bindSymbol(*arg, hlfir::translateToExtendedValue(
-@@ -1418,7 +1600,62 @@
-   };
- 
-   genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp);
--  return genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item, clauseOps);
-+  auto parallelOp =
-+      genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item, clauseOps);
-+  if (numThreadsClauseOps.numThreadsVar) {
-+    if (parentTarget)
-+      parentTarget.getNumThreadsMutable().assign(
-+          numThreadsClauseOps.numThreadsVar);
-+    else
-+      parallelOp.getNumThreadsVarMutable().assign(
-+          numThreadsClauseOps.numThreadsVar);
-+  }
-+  return parallelOp;
-+}
-+
-+static mlir::omp::ParallelOp genParallelWrapperOp(
-+    lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
-+    lower::pft::Evaluation &eval, mlir::Location loc,
-+    const mlir::omp::ParallelClauseOps &clauseOps,
-+    mlir::omp::NumThreadsClauseOps &numThreadsClauseOps,
-+    llvm::ArrayRef<const semantics::Symbol *> reductionSyms,
-+    llvm::ArrayRef<mlir::Type> reductionTypes, mlir::omp::TargetOp parentTarget,
-+    DataSharingProcessor &dsp) {
-+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-+
-+  // Create omp.parallel wrapper.
-+  auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>(loc, clauseOps);
-+
-+  if (numThreadsClauseOps.numThreadsVar) {
-+    if (parentTarget)
-+      parentTarget.getNumThreadsMutable().assign(
-+          numThreadsClauseOps.numThreadsVar);
-+    else
-+      parallelOp.getNumThreadsVarMutable().assign(
-+          numThreadsClauseOps.numThreadsVar);
-+  }
-+
-+  // Populate entry block arguments with reduction and private variables.
-+  mlir::OperandRange privateVars = parallelOp.getPrivateVars();
-+
-+  llvm::SmallVector<mlir::Type> blockArgTypes(reductionTypes.begin(),
-+                                              reductionTypes.end());
-+  blockArgTypes.reserve(blockArgTypes.size() + privateVars.size());
-+  llvm::transform(privateVars, std::back_inserter(blockArgTypes),
-+                  [](mlir::Value v) { return v.getType(); });
-+
-+  llvm::SmallVector<mlir::Location> blockArgLocs(reductionTypes.size(), loc);
-+  blockArgLocs.reserve(blockArgLocs.size() + privateVars.size());
-+  llvm::transform(privateVars, std::back_inserter(blockArgLocs),
-+                  [](mlir::Value v) { return v.getLoc(); });
-+
-+  firOpBuilder.createBlock(&parallelOp.getRegion(), {}, blockArgTypes,
-+                           blockArgLocs);
-+
-+  firOpBuilder.setInsertionPoint(
-+      lower::genOpenMPTerminator(firOpBuilder, parallelOp, loc));
-+
-+  return parallelOp;
- }
++  dsp.processStep2(&loopClauseOps);
  
- static mlir::omp::SectionOp
-@@ -1443,13 +1680,15 @@
-   mlir::omp::SectionsClauseOps clauseOps;
-   genSectionsClauses(converter, semaCtx, item->clauses, loc, clauseOps);
- 
--  auto &builder = converter.getFirOpBuilder();
-+  auto &firOpBuilder = converter.getFirOpBuilder();
- 
-   // Insert privatizations before SECTIONS
-   symTable.pushScope();
-+  // TODO: Add support for delayed privatization.
+   mlir::omp::LoopNestOperands loopNestClauseOps;
+   llvm::SmallVector<const semantics::Symbol *> iv;
+@@ -1631,6 +2041,8 @@
    DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                             lower::omp::isLastItemInQueue(item, queue));
    dsp.processStep1();
++  // TODO: Add support for delayed privatization.
 +  dsp.processStep2();
  
    List<Clause> nonDsaClauses;
    List<const clause::Lastprivate *> lastprivates;
-@@ -1488,9 +1727,9 @@
-     assert(lastSectionOp != body.rend());
- 
-     for (const clause::Lastprivate *lastp : lastprivates) {
--      builder.setInsertionPoint(
-+      firOpBuilder.setInsertionPoint(
-           lastSectionOp->getRegion(0).back().getTerminator());
--      mlir::OpBuilder::InsertPoint insp = builder.saveInsertionPoint();
-+      mlir::OpBuilder::InsertPoint insp = firOpBuilder.saveInsertionPoint();
-       const auto &objList = std::get<ObjectList>(lastp->t);
-       for (const Object &object : objList) {
-         semantics::Symbol *sym = object.sym();
-@@ -1500,65 +1739,35 @@
-   }
+@@ -1687,8 +2099,8 @@
+     }
+ 
+     ConstructQueue sectionQueue{buildConstructQueue(
+-        converter.getFirOpBuilder().getModule(), semaCtx, nestedEval,
+-        sectionConstruct->source, llvm::omp::Directive::OMPD_section, {})};
++        builder.getModule(), semaCtx, nestedEval, sectionConstruct->source,
++        llvm::omp::Directive::OMPD_section, {})};
+ 
+     builder.setInsertionPoint(terminator);
+     genOpWithBody<mlir::omp::SectionOp>(
+@@ -1724,7 +2136,7 @@
  
    // Perform DataSharingProcessor's step2 out of SECTIONS
--  builder.setInsertionPointAfter(sectionsOp.getOperation());
+   builder.setInsertionPointAfter(sectionsOp.getOperation());
 -  dsp.processStep2(sectionsOp, false);
-+  firOpBuilder.setInsertionPointAfter(sectionsOp.getOperation());
 +  dsp.processStep3(sectionsOp, false);
    // Emit implicit barrier to synchronize threads and avoid data
    // races on post-update of lastprivate variables when `nowait`
    // clause is present.
-   if (clauseOps.nowaitAttr && !lastprivates.empty())
--    builder.create<mlir::omp::BarrierOp>(loc);
-+    firOpBuilder.create<mlir::omp::BarrierOp>(loc);
- 
-   symTable.popScope();
-   return sectionsOp;
- }
- 
--static mlir::omp::SimdOp
--genSimdOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
--          semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
--          mlir::Location loc, const ConstructQueue &queue,
--          ConstructQueue::iterator item) {
-+static mlir::omp::SimdOp genSimdWrapperOp(lower::AbstractConverter &converter,
-+                                          semantics::SemanticsContext &semaCtx,
-+                                          lower::pft::Evaluation &eval,
-+                                          mlir::Location loc,
-+                                          mlir::omp::SimdClauseOps &clauseOps,
-+                                          DataSharingProcessor &dsp) {
-   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
--  symTable.pushScope();
--  DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
--                           lower::omp::isLastItemInQueue(item, queue));
--  dsp.processStep1();
--
--  lower::StatementContext stmtCtx;
--  mlir::omp::LoopNestClauseOps loopClauseOps;
--  mlir::omp::SimdClauseOps simdClauseOps;
--  llvm::SmallVector<const semantics::Symbol *> iv;
--  genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
--                     loopClauseOps, iv);
--  genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps);
- 
-   // Create omp.simd wrapper.
--  auto simdOp = firOpBuilder.create<mlir::omp::SimdOp>(loc, simdClauseOps);
-+  auto simdOp = firOpBuilder.create<mlir::omp::SimdOp>(loc, clauseOps);
- 
--  // TODO: Add reduction-related arguments to the wrapper's entry block.
-+  // TODO: Populate entry block arguments with reduction variables.
-   firOpBuilder.createBlock(&simdOp.getRegion());
-+
-   firOpBuilder.setInsertionPoint(
-       lower::genOpenMPTerminator(firOpBuilder, simdOp, loc));
- 
--  // Create nested omp.loop_nest and fill body with loop contents.
--  auto loopOp = firOpBuilder.create<mlir::omp::LoopNestOp>(loc, loopClauseOps);
--
--  auto *nestedEval =
--      getCollapsedLoopEval(eval, getCollapseValue(item->clauses));
--
--  auto ivCallback = [&](mlir::Operation *op) {
--    genLoopVars(op, converter, loc, iv);
--    return iv;
--  };
--
--  createBodyOfOp(*loopOp,
--                 OpWithBodyGenInfo(converter, symTable, semaCtx, loc,
--                                   *nestedEval, llvm::omp::Directive::OMPD_simd)
--                     .setClauses(&item->clauses)
--                     .setDataSharingProcessor(&dsp)
--                     .setGenRegionEntryCb(ivCallback),
--                 queue, item);
--
--  symTable.popScope();
-   return simdOp;
- }
- 
-@@ -1581,7 +1790,7 @@
- genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
-             semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-             mlir::Location loc, const ConstructQueue &queue,
--            ConstructQueue::iterator item, bool outerCombined = false) {
-+            ConstructQueue::iterator item) {
+@@ -1765,23 +2177,27 @@
+             ConstructQueue::const_iterator item) {
    fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
    lower::StatementContext stmtCtx;
+-
+-  bool processHostOnlyClauses =
+-      !llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp())
+-           .getIsTargetDevice();
++  bool isTargetDevice =
++      llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp())
++          .getIsTargetDevice();
++
++  // Introduce a new host_eval information structure for this target region.
++  if (!isTargetDevice)
++    hostEvalInfo.emplace_back();
+ 
+   mlir::omp::TargetOperands clauseOps;
+   llvm::SmallVector<const semantics::Symbol *> mapSyms, isDevicePtrSyms,
+       hasDeviceAddrSyms;
+-  genTargetClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
+-                   processHostOnlyClauses, clauseOps, hasDeviceAddrSyms,
+-                   isDevicePtrSyms, mapSyms);
++  genTargetClauses(converter, semaCtx, stmtCtx, eval, item->clauses, loc,
++                   clauseOps, hasDeviceAddrSyms, isDevicePtrSyms, mapSyms);
  
-@@ -1595,17 +1804,23 @@
-   llvm::SmallVector<mlir::Location> mapLocs, devicePtrLocs, deviceAddrLocs;
-   llvm::SmallVector<mlir::Type> mapTypes, devicePtrTypes, deviceAddrTypes;
-   genTargetClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
--                   processHostOnlyClauses, /*processReduction=*/outerCombined,
--                   clauseOps, mapSyms, mapLocs, mapTypes, deviceAddrSyms,
--                   deviceAddrLocs, deviceAddrTypes, devicePtrSyms,
--                   devicePtrLocs, devicePtrTypes);
-+                   processHostOnlyClauses, clauseOps, mapSyms, mapLocs,
-+                   mapTypes, deviceAddrSyms, deviceAddrLocs, deviceAddrTypes,
-+                   devicePtrSyms, devicePtrLocs, devicePtrTypes);
- 
--  llvm::SmallVector<const semantics::Symbol *> privateSyms;
    DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                             /*shouldCollectPreDeterminedSymbols=*/
                             lower::omp::isLastItemInQueue(item, queue),
@@ -5650,672 +5898,322 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/OpenMP.cp
 -  dsp.processStep1(&clauseOps);
 +                           enableDelayedPrivatizationStaging, &symTable);
 +  dsp.processStep1();
-+
-+  if (enableDelayedPrivatizationStaging) {
-+    dsp.processStep2();
-+
-+    const auto &privateClauseOps = dsp.getPrivateClauseOps();
-+    clauseOps.privateVars = privateClauseOps.privateVars;
-+    clauseOps.privatizers = privateClauseOps.privatizers;
-+  }
++  if (enableDelayedPrivatizationStaging)
++    dsp.processStep2(&clauseOps);
  
    // 5.8.1 Implicit Data-Mapping Attribute Rules
    // The following code follows the implicit data-mapping rules to map all the
-@@ -1700,6 +1915,7 @@
+@@ -1805,84 +2221,83 @@
+       if (llvm::is_contained(mapSyms, common))
+         return;
+ 
+-    // If we come across a symbol without a symbol address, we
+-    // return as we cannot process it, this is intended as a
+-    // catch all early exit for symbols that do not have a
+-    // corresponding extended value. Such as subroutines,
+-    // interfaces and named blocks.
++    // If we come across a symbol without a symbol address, we return as we
++    // cannot process it, this is intended as a catch all early exit for
++    // symbols that do not have a corresponding extended value. Such as
++    // subroutines, interfaces and named blocks.
+     if (!converter.getSymbolAddress(sym))
+       return;
+ 
+-    if (!llvm::is_contained(mapSyms, &sym)) {
+-      if (const auto *details =
+-              sym.template detailsIf<semantics::HostAssocDetails>())
+-        converter.copySymbolBinding(details->symbol(), sym);
+-      llvm::SmallVector<mlir::Value> bounds;
+-      std::stringstream name;
+-      fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym);
+-      name << sym.name().ToString();
+-
+-      lower::AddrAndBoundsInfo info = getDataOperandBaseAddr(
+-          converter, firOpBuilder, sym, converter.getCurrentLocation());
+-      mlir::Value baseOp = info.rawInput;
+-      if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(baseOp.getType())))
+-        bounds = lower::genBoundsOpsFromBox<mlir::omp::MapBoundsOp,
+-                                            mlir::omp::MapBoundsType>(
+-            firOpBuilder, converter.getCurrentLocation(), dataExv, info);
+-      if (mlir::isa<fir::SequenceType>(fir::unwrapRefType(baseOp.getType()))) {
+-        bool dataExvIsAssumedSize =
+-            semantics::IsAssumedSizeArray(sym.GetUltimate());
+-        bounds = lower::genBaseBoundsOps<mlir::omp::MapBoundsOp,
+-                                         mlir::omp::MapBoundsType>(
+-            firOpBuilder, converter.getCurrentLocation(), dataExv,
+-            dataExvIsAssumedSize);
+-      }
++    if (llvm::is_contained(mapSyms, &sym))
++      return;
+ 
+-      llvm::omp::OpenMPOffloadMappingFlags mapFlag =
+-          llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
+-      mlir::omp::VariableCaptureKind captureKind =
+-          mlir::omp::VariableCaptureKind::ByRef;
+-
+-      mlir::Type eleType = baseOp.getType();
+-      if (auto refType = mlir::dyn_cast<fir::ReferenceType>(baseOp.getType()))
+-        eleType = refType.getElementType();
+-
+-      // If a variable is specified in declare target link and if device
+-      // type is not specified as `nohost`, it needs to be mapped tofrom
+-      mlir::ModuleOp mod = firOpBuilder.getModule();
+-      mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym));
+-      auto declareTargetOp =
+-          llvm::dyn_cast_if_present<mlir::omp::DeclareTargetInterface>(op);
+-      if (declareTargetOp && declareTargetOp.isDeclareTarget()) {
+-        if (declareTargetOp.getDeclareTargetCaptureClause() ==
+-                mlir::omp::DeclareTargetCaptureClause::link &&
+-            declareTargetOp.getDeclareTargetDeviceType() !=
+-                mlir::omp::DeclareTargetDeviceType::nohost) {
+-          mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
+-          mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+-        }
+-      } else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
+-        captureKind = mlir::omp::VariableCaptureKind::ByCopy;
+-      } else if (!fir::isa_builtin_cptr_type(eleType)) {
++    if (const auto *details =
++            sym.template detailsIf<semantics::HostAssocDetails>())
++      converter.copySymbolBinding(details->symbol(), sym);
++    llvm::SmallVector<mlir::Value> bounds;
++    std::stringstream name;
++    fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym);
++    name << sym.name().ToString();
++
++    lower::AddrAndBoundsInfo info = getDataOperandBaseAddr(
++        converter, firOpBuilder, sym, converter.getCurrentLocation());
++    mlir::Value baseOp = info.rawInput;
++    if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(baseOp.getType())))
++      bounds = lower::genBoundsOpsFromBox<mlir::omp::MapBoundsOp,
++                                          mlir::omp::MapBoundsType>(
++          firOpBuilder, converter.getCurrentLocation(), dataExv, info);
++    if (mlir::isa<fir::SequenceType>(fir::unwrapRefType(baseOp.getType()))) {
++      bool dataExvIsAssumedSize =
++          semantics::IsAssumedSizeArray(sym.GetUltimate());
++      bounds = lower::genBaseBoundsOps<mlir::omp::MapBoundsOp,
++                                       mlir::omp::MapBoundsType>(
++          firOpBuilder, converter.getCurrentLocation(), dataExv,
++          dataExvIsAssumedSize);
++    }
++
++    llvm::omp::OpenMPOffloadMappingFlags mapFlag =
++        llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
++    mlir::omp::VariableCaptureKind captureKind =
++        mlir::omp::VariableCaptureKind::ByRef;
++
++    mlir::Type eleType = baseOp.getType();
++    if (auto refType = mlir::dyn_cast<fir::ReferenceType>(baseOp.getType()))
++      eleType = refType.getElementType();
++
++    // If a variable is specified in declare target link and if device
++    // type is not specified as `nohost`, it needs to be mapped tofrom
++    mlir::ModuleOp mod = firOpBuilder.getModule();
++    mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym));
++    auto declareTargetOp =
++        llvm::dyn_cast_if_present<mlir::omp::DeclareTargetInterface>(op);
++    if (declareTargetOp && declareTargetOp.isDeclareTarget()) {
++      if (declareTargetOp.getDeclareTargetCaptureClause() ==
++              mlir::omp::DeclareTargetCaptureClause::link &&
++          declareTargetOp.getDeclareTargetDeviceType() !=
++              mlir::omp::DeclareTargetDeviceType::nohost) {
+         mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
+         mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+       }
+-      auto location =
+-          mlir::NameLoc::get(mlir::StringAttr::get(firOpBuilder.getContext(),
+-                                                   sym.name().ToString()),
+-                             baseOp.getLoc());
+-      mlir::Value mapOp = createMapInfoOp(
+-          firOpBuilder, location, baseOp, /*varPtrPtr=*/mlir::Value{},
+-          name.str(), bounds, /*members=*/{},
+-          /*membersIndex=*/mlir::ArrayAttr{},
+-          static_cast<
+-              std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
+-              mapFlag),
+-          captureKind, baseOp.getType());
++    } else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
++      captureKind = mlir::omp::VariableCaptureKind::ByCopy;
++    } else if (!fir::isa_builtin_cptr_type(eleType)) {
++      mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
++      mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
++    }
++    auto location = mlir::NameLoc::get(
++        mlir::StringAttr::get(firOpBuilder.getContext(), sym.name().ToString()),
++        baseOp.getLoc());
++    mlir::Value mapOp = createMapInfoOp(
++        firOpBuilder, location, baseOp, /*varPtrPtr=*/mlir::Value{}, name.str(),
++        bounds, /*members=*/{},
++        /*membersIndex=*/mlir::ArrayAttr{},
++        static_cast<
++            std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
++            mapFlag),
++        captureKind, baseOp.getType());
+ 
+-      clauseOps.mapVars.push_back(mapOp);
+-      mapSyms.push_back(&sym);
+-    }
++    clauseOps.mapVars.push_back(mapOp);
++    mapSyms.push_back(&sym);
    };
    lower::pft::visitAllSymbols(eval, captureImplicitMap);
  
+@@ -1892,6 +2307,7 @@
+   extractMappedBaseValues(clauseOps.mapVars, mapBaseValues);
+ 
+   EntryBlockArgs args;
++  args.hostEvalVars = clauseOps.hostEvalVars;
+   // TODO: Add in_reduction syms and vars.
+   args.map.syms = mapSyms;
+   args.map.vars = mapBaseValues;
+@@ -1900,6 +2316,10 @@
+ 
+   genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, args, loc,
+                     queue, item, dsp);
 +
-   auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
-   genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, mapSyms,
-                     mapLocs, mapTypes, dsp, loc, queue, item);
-@@ -1788,11 +2004,10 @@
-       queue, item, clauseOps);
++  // Remove the host_eval information structure created for this target region.
++  if (!isTargetDevice)
++    hostEvalInfo.pop_back();
+   return targetOp;
  }
  
--static mlir::omp::TaskloopOp
--genTaskloopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
--              semantics::SemanticsContext &semaCtx,
--              lower::pft::Evaluation &eval, mlir::Location loc,
--              const ConstructQueue &queue, ConstructQueue::iterator item) {
-+static mlir::omp::TaskloopOp genTaskloopWrapperOp(
-+    lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
-+    lower::pft::Evaluation &eval, mlir::Location loc,
-+    const ConstructQueue &queue, ConstructQueue::iterator item) {
-   TODO(loc, "Taskloop construct");
- }
+@@ -1982,7 +2402,8 @@
+   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
+                            lower::omp::isLastItemInQueue(item, queue),
+                            /*useDelayedPrivatization=*/true, &symTable);
+-  dsp.processStep1(&clauseOps);
++  dsp.processStep1();
++  dsp.processStep2(&clauseOps);
  
-@@ -1819,72 +2034,187 @@
- genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
-            semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   EntryBlockArgs taskArgs;
+   taskArgs.priv.syms = dsp.getDelayedPrivSymbols();
+@@ -2066,14 +2487,33 @@
             mlir::Location loc, const ConstructQueue &queue,
--           ConstructQueue::iterator item, bool outerCombined = false) {
-+           ConstructQueue::iterator item) {
+            ConstructQueue::const_iterator item) {
    lower::StatementContext stmtCtx;
 +
-+  auto offloadModOp = llvm::cast<mlir::omp::OffloadModuleInterface>(
-+      converter.getModuleOp().getOperation());
-+  mlir::omp::TargetOp targetOp =
-+      findParentTargetOp(converter.getFirOpBuilder());
-+  bool mustEvalOutsideTarget = targetOp && !offloadModOp.getIsTargetDevice();
-+
-   mlir::omp::TeamsClauseOps clauseOps;
+   mlir::omp::TeamsOperands clauseOps;
 -  genTeamsClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps);
-+  mlir::omp::NumTeamsClauseOps numTeamsClauseOps;
-+  mlir::omp::ThreadLimitClauseOps threadLimitClauseOps;
-+  genTeamsClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
-+                  mustEvalOutsideTarget, clauseOps, numTeamsClauseOps,
-+                  threadLimitClauseOps);
++  llvm::SmallVector<const semantics::Symbol *> reductionSyms;
++  genTeamsClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps,
++                  reductionSyms);
++
++  EntryBlockArgs args;
++  // TODO: Add private syms and vars.
++  args.reduction.syms = reductionSyms;
++  args.reduction.vars = clauseOps.reductionVars;
  
 -  return genOpWithBody<mlir::omp::TeamsOp>(
++  auto genRegionEntryCB = [&](mlir::Operation *op) {
++    genEntryBlock(converter, args, op->getRegion(0));
++    bindEntryBlockArgs(
++        converter, llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op), args);
++    return llvm::to_vector(llvm::concat<const semantics::Symbol *const>(
++        args.priv.syms, args.reduction.syms));
++  };
++
 +  auto teamsOp = genOpWithBody<mlir::omp::TeamsOp>(
        OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                          llvm::omp::Directive::OMPD_teams)
--          .setOuterCombined(outerCombined)
-           .setClauses(&item->clauses),
+-          .setClauses(&item->clauses),
++          .setClauses(&item->clauses)
++          .setGenRegionEntryCb(genRegionEntryCB),
        queue, item, clauseOps);
 +
-+  if (numTeamsClauseOps.numTeamsUpperVar) {
-+    if (mustEvalOutsideTarget)
-+      targetOp.getNumTeamsUpperMutable().assign(
-+          numTeamsClauseOps.numTeamsUpperVar);
-+    else
-+      teamsOp.getNumTeamsUpperMutable().assign(
-+          numTeamsClauseOps.numTeamsUpperVar);
-+  }
-+
-+  if (threadLimitClauseOps.threadLimitVar) {
-+    if (mustEvalOutsideTarget)
-+      targetOp.getTeamsThreadLimitMutable().assign(
-+          threadLimitClauseOps.threadLimitVar);
-+    else
-+      teamsOp.getThreadLimitMutable().assign(
-+          threadLimitClauseOps.threadLimitVar);
-+  }
-+
 +  return teamsOp;
  }
  
--static mlir::omp::WsloopOp
--genWsloopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
--            semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
--            mlir::Location loc, const ConstructQueue &queue,
--            ConstructQueue::iterator item) {
-+static mlir::omp::WsloopOp genWsloopWrapperOp(
-+    lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
-+    lower::pft::Evaluation &eval, mlir::Location loc,
-+    const mlir::omp::WsloopClauseOps &clauseOps,
-+    llvm::ArrayRef<const semantics::Symbol *> reductionSyms,
-+    llvm::ArrayRef<mlir::Type> reductionTypes, DataSharingProcessor &dsp) {
-   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
--  symTable.pushScope();
--  DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
--                           lower::omp::isLastItemInQueue(item, queue));
--  dsp.processStep1();
- 
--  lower::StatementContext stmtCtx;
--  mlir::omp::LoopNestClauseOps loopClauseOps;
--  mlir::omp::WsloopClauseOps wsClauseOps;
--  llvm::SmallVector<const semantics::Symbol *> iv;
--  llvm::SmallVector<mlir::Type> reductionTypes;
--  llvm::SmallVector<const semantics::Symbol *> reductionSyms;
--  genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
--                     loopClauseOps, iv);
--  genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, wsClauseOps,
--                   reductionTypes, reductionSyms);
--
--  // Create omp.wsloop wrapper and populate entry block arguments with reduction
--  // variables.
--  auto wsloopOp = firOpBuilder.create<mlir::omp::WsloopOp>(loc, wsClauseOps);
-+  // Create omp.wsloop wrapper.
-   llvm::SmallVector<mlir::Location> reductionLocs(reductionSyms.size(), loc);
--  mlir::Block *wsloopEntryBlock = firOpBuilder.createBlock(
--      &wsloopOp.getRegion(), {}, reductionTypes, reductionLocs);
-+  auto wsloopOp = firOpBuilder.create<mlir::omp::WsloopOp>(loc, clauseOps);
-+
-+  // Populate entry block arguments with reduction variables.
-+  firOpBuilder.createBlock(&wsloopOp.getRegion(), {}, reductionTypes,
-+                           reductionLocs);
-+
-   firOpBuilder.setInsertionPoint(
-       lower::genOpenMPTerminator(firOpBuilder, wsloopOp, loc));
- 
--  // Create nested omp.loop_nest and fill body with loop contents.
--  auto loopOp = firOpBuilder.create<mlir::omp::LoopNestOp>(loc, loopClauseOps);
-+  return wsloopOp;
-+}
+ //===----------------------------------------------------------------------===//
+@@ -2097,7 +2537,8 @@
+   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
+                            /*shouldCollectPreDeterminedSymbols=*/true,
+                            enableDelayedPrivatizationStaging, &symTable);
+-  dsp.processStep1(&distributeClauseOps);
++  dsp.processStep1();
++  dsp.processStep2(&distributeClauseOps);
  
--  auto *nestedEval =
--      getCollapsedLoopEval(eval, getCollapseValue(item->clauses));
-+//===----------------------------------------------------------------------===//
-+// Code generation functions for the standalone version of constructs that can
-+// be a leaf in a composite construct
-+//===----------------------------------------------------------------------===//
+   mlir::omp::LoopNestOperands loopNestClauseOps;
+   llvm::SmallVector<const semantics::Symbol *> iv;
+@@ -2122,7 +2563,6 @@
+                             const ConstructQueue &queue,
+                             ConstructQueue::const_iterator item) {
+   lower::StatementContext stmtCtx;
+-
+   mlir::omp::WsloopOperands wsloopClauseOps;
+   llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
+   genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
+@@ -2133,6 +2573,7 @@
+                            /*shouldCollectPreDeterminedSymbols=*/true,
+                            /*useDelayedPrivatization=*/false, &symTable);
+   dsp.processStep1();
++  dsp.processStep2();
  
--  auto ivCallback = [&](mlir::Operation *op) {
--    genLoopVars(op, converter, loc, iv, reductionSyms,
--                wsloopEntryBlock->getArguments());
--    return iv;
--  };
-+static void genStandaloneDistribute(
-+    lower::AbstractConverter &converter, lower::SymMap &symTable,
-+    semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-+    mlir::Location loc, const ConstructQueue &queue,
-+    ConstructQueue::iterator item, DataSharingProcessor &dsp) {
-+  mlir::omp::DistributeClauseOps distributeClauseOps;
-+  // TODO: Process DISTRIBUTE clauses
- 
--  createBodyOfOp(*loopOp,
--                 OpWithBodyGenInfo(converter, symTable, semaCtx, loc,
--                                   *nestedEval, llvm::omp::Directive::OMPD_do)
--                     .setClauses(&item->clauses)
--                     .setDataSharingProcessor(&dsp)
--                     .setReductions(&reductionSyms, &reductionTypes)
--                     .setGenRegionEntryCb(ivCallback),
--                 queue, item);
--  symTable.popScope();
--  return wsloopOp;
-+  mlir::omp::LoopNestClauseOps loopNestClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> iv;
-+  genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
-+                     loopNestClauseOps, iv);
-+
-+  auto distributeOp = genDistributeWrapperOp(converter, semaCtx, eval, loc,
-+                                             distributeClauseOps, dsp);
-+
-+  genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
-+                loopNestClauseOps, iv,
-+                /*wrapperSyms=*/{}, distributeOp.getRegion().getArguments(),
-+                llvm::omp::Directive::OMPD_distribute, dsp);
-+}
-+
-+static void genStandaloneDo(lower::AbstractConverter &converter,
-+                            lower::SymMap &symTable,
-+                            semantics::SemanticsContext &semaCtx,
-+                            lower::pft::Evaluation &eval, mlir::Location loc,
-+                            const ConstructQueue &queue,
-+                            ConstructQueue::iterator item,
-+                            DataSharingProcessor &dsp) {
-+  lower::StatementContext stmtCtx;
-+
-+  mlir::omp::WsloopClauseOps wsloopClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> reductionSyms;
-+  llvm::SmallVector<mlir::Type> reductionTypes;
-+  genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
-+                   wsloopClauseOps, reductionTypes, reductionSyms);
-+
-+  mlir::omp::LoopNestClauseOps loopNestClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> iv;
-+  genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
-+                     loopNestClauseOps, iv);
-+
-+  auto wsloopOp =
-+      genWsloopWrapperOp(converter, semaCtx, eval, loc, wsloopClauseOps,
-+                         reductionSyms, reductionTypes, dsp);
-+
-+  genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
-+                loopNestClauseOps, iv, reductionSyms,
-+                wsloopOp.getRegion().getArguments(),
-+                llvm::omp::Directive::OMPD_do, dsp);
-+}
-+
-+static void genStandaloneParallel(lower::AbstractConverter &converter,
-+                                  lower::SymMap &symTable,
-+                                  semantics::SemanticsContext &semaCtx,
-+                                  lower::pft::Evaluation &eval,
-+                                  mlir::Location loc,
-+                                  const ConstructQueue &queue,
-+                                  ConstructQueue::iterator item) {
-+  lower::StatementContext stmtCtx;
-+
-+  auto offloadModOp =
-+      llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp());
-+  mlir::omp::TargetOp targetOp =
-+      findParentTargetOp(converter.getFirOpBuilder());
-+  bool evalOutsideTarget =
-+      targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval);
-+
-+  mlir::omp::ParallelClauseOps parallelClauseOps;
-+  mlir::omp::NumThreadsClauseOps numThreadsClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> reductionSyms;
-+  llvm::SmallVector<mlir::Type> reductionTypes;
-+  genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
-+                     evalOutsideTarget, parallelClauseOps, numThreadsClauseOps,
-+                     reductionTypes, reductionSyms);
-+
-+  genParallelOp(converter, symTable, semaCtx, eval, loc, queue, item,
-+                parallelClauseOps, numThreadsClauseOps, reductionSyms,
-+                reductionTypes, evalOutsideTarget ? targetOp : nullptr);
-+}
-+
-+static void genStandaloneSimd(lower::AbstractConverter &converter,
-+                              lower::SymMap &symTable,
-+                              semantics::SemanticsContext &semaCtx,
-+                              lower::pft::Evaluation &eval, mlir::Location loc,
-+                              const ConstructQueue &queue,
-+                              ConstructQueue::iterator item,
-+                              DataSharingProcessor &dsp) {
-+  mlir::omp::SimdClauseOps simdClauseOps;
-+  genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps);
-+
-+  mlir::omp::LoopNestClauseOps loopNestClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> iv;
-+  genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
-+                     loopNestClauseOps, iv);
-+
-+  auto simdOp =
-+      genSimdWrapperOp(converter, semaCtx, eval, loc, simdClauseOps, dsp);
-+
-+  genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
-+                loopNestClauseOps, iv,
-+                /*wrapperSyms=*/{}, simdOp.getRegion().getArguments(),
-+                llvm::omp::Directive::OMPD_simd, dsp);
-+}
-+
-+static void genStandaloneTaskloop(
-+    lower::AbstractConverter &converter, lower::SymMap &symTable,
-+    semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-+    mlir::Location loc, const ConstructQueue &queue,
-+    ConstructQueue::iterator item, DataSharingProcessor &dsp) {
-+  TODO(loc, "Taskloop construct");
- }
+   mlir::omp::LoopNestOperands loopNestClauseOps;
+   llvm::SmallVector<const semantics::Symbol *> iv;
+@@ -2170,7 +2611,8 @@
+     dsp.emplace(converter, semaCtx, item->clauses, eval,
+                 lower::omp::isLastItemInQueue(item, queue),
+                 /*useDelayedPrivatization=*/true, &symTable);
+-    dsp->processStep1(&parallelClauseOps);
++    dsp->processStep1();
++    dsp->processStep2(&parallelClauseOps);
+   }
  
- //===----------------------------------------------------------------------===//
-@@ -1895,26 +2225,195 @@
-     lower::AbstractConverter &converter, lower::SymMap &symTable,
-     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-     mlir::Location loc, const ConstructQueue &queue,
--    ConstructQueue::iterator item) {
--  TODO(loc, "Composite DISTRIBUTE PARALLEL DO");
-+    ConstructQueue::iterator item, DataSharingProcessor &dsp) {
-+  lower::StatementContext stmtCtx;
-+
-+  auto offloadModOp =
-+      llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp());
-+  mlir::omp::TargetOp targetOp =
-+      findParentTargetOp(converter.getFirOpBuilder());
-+  bool evalOutsideTarget =
-+      targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval);
-+
-+  // Clause processing.
-+  mlir::omp::DistributeClauseOps distributeClauseOps;
-+  // TODO: Process DISTRIBUTE clauses
-+
-+  mlir::omp::ParallelClauseOps parallelClauseOps;
-+  mlir::omp::NumThreadsClauseOps numThreadsClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
-+  llvm::SmallVector<mlir::Type> parallelReductionTypes;
-+  genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
-+                     /*evalOutsideTarget=*/evalOutsideTarget, parallelClauseOps,
-+                     numThreadsClauseOps, parallelReductionTypes,
-+                     parallelReductionSyms);
-+
-+  const auto &privateClauseOps = dsp.getPrivateClauseOps();
-+  parallelClauseOps.privateVars = privateClauseOps.privateVars;
-+  parallelClauseOps.privatizers = privateClauseOps.privatizers;
-+
-+  mlir::omp::WsloopClauseOps wsloopClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
-+  llvm::SmallVector<mlir::Type> wsloopReductionTypes;
-+  genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
-+                   wsloopClauseOps, wsloopReductionTypes, wsloopReductionSyms);
-+
-+  mlir::omp::LoopNestClauseOps loopNestClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> iv;
-+  genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
-+                     loopNestClauseOps, iv);
-+
-+  // Operation creation.
-+  auto distributeOp = genDistributeWrapperOp(converter, semaCtx, eval, loc,
-+                                             distributeClauseOps, dsp);
-+
-+  auto parallelOp = genParallelWrapperOp(
-+      converter, semaCtx, eval, loc, parallelClauseOps, numThreadsClauseOps,
-+      parallelReductionSyms, parallelReductionTypes,
-+      evalOutsideTarget ? targetOp : nullptr, dsp);
-+
-+  auto wsloopOp =
-+      genWsloopWrapperOp(converter, semaCtx, eval, loc, wsloopClauseOps,
-+                         wsloopReductionSyms, wsloopReductionTypes, dsp);
-+
-+  // Construct wrapper entry block list and associated symbols. It is important
-+  // that the symbol order and the block argument order match, so that the
-+  // symbol-value bindings created are correct.
-+  auto wrapperSyms =
-+      llvm::to_vector(llvm::concat<const semantics::Symbol *const>(
-+          parallelReductionSyms, dsp.getDelayedPrivSyms(),
-+          wsloopReductionSyms));
-+
-+  auto wrapperArgs = llvm::to_vector(
-+      llvm::concat<mlir::BlockArgument>(distributeOp.getRegion().getArguments(),
-+                                        parallelOp.getRegion().getArguments(),
-+                                        wsloopOp.getRegion().getArguments()));
-+
-+  assert(wrapperSyms.size() == wrapperArgs.size() &&
-+         "Number of symbols and wrapper block arguments must match");
-+  genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
-+                loopNestClauseOps, iv, wrapperSyms, wrapperArgs,
-+                llvm::omp::Directive::OMPD_distribute_parallel_do, dsp);
+   EntryBlockArgs parallelArgs;
+@@ -2181,7 +2623,8 @@
+   parallelArgs.reduction.vars = parallelClauseOps.reductionVars;
+   genParallelOp(converter, symTable, semaCtx, eval, loc, queue, item,
+                 parallelClauseOps, parallelArgs,
+-                enableDelayedPrivatization ? &dsp.value() : nullptr);
++                enableDelayedPrivatization ? &dsp.value() : nullptr,
++                /*isComposite=*/false);
  }
  
- static void genCompositeDistributeParallelDoSimd(
-     lower::AbstractConverter &converter, lower::SymMap &symTable,
-     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-     mlir::Location loc, const ConstructQueue &queue,
--    ConstructQueue::iterator item) {
--  TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD");
-+    ConstructQueue::iterator item, DataSharingProcessor &dsp) {
-+  lower::StatementContext stmtCtx;
-+
-+  auto offloadModOp =
-+      llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp());
-+  mlir::omp::TargetOp targetOp =
-+      findParentTargetOp(converter.getFirOpBuilder());
-+  bool evalOutsideTarget =
-+      targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval);
-+
-+  // Clause processing.
-+  mlir::omp::DistributeClauseOps distributeClauseOps;
-+  // TODO: Process DISTRIBUTE clauses
-+
-+  mlir::omp::ParallelClauseOps parallelClauseOps;
-+  mlir::omp::NumThreadsClauseOps numThreadsClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
-+  llvm::SmallVector<mlir::Type> parallelReductionTypes;
-+  genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
-+                     /*evalOutsideTarget=*/evalOutsideTarget, parallelClauseOps,
-+                     numThreadsClauseOps, parallelReductionTypes,
-+                     parallelReductionSyms);
-+
-+  const auto &privateClauseOps = dsp.getPrivateClauseOps();
-+  parallelClauseOps.privateVars = privateClauseOps.privateVars;
-+  parallelClauseOps.privatizers = privateClauseOps.privatizers;
-+
-+  mlir::omp::WsloopClauseOps wsloopClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
-+  llvm::SmallVector<mlir::Type> wsloopReductionTypes;
-+  genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
-+                   wsloopClauseOps, wsloopReductionTypes, wsloopReductionSyms);
-+
-+  mlir::omp::SimdClauseOps simdClauseOps;
-+  genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps);
-+
-+  mlir::omp::LoopNestClauseOps loopNestClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> iv;
-+  genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
-+                     loopNestClauseOps, iv);
-+
-+  // Operation creation.
-+  auto distributeOp = genDistributeWrapperOp(converter, semaCtx, eval, loc,
-+                                             distributeClauseOps, dsp);
-+
-+  auto parallelOp = genParallelWrapperOp(
-+      converter, semaCtx, eval, loc, parallelClauseOps, numThreadsClauseOps,
-+      parallelReductionSyms, parallelReductionTypes,
-+      evalOutsideTarget ? targetOp : nullptr, dsp);
-+
-+  auto wsloopOp =
-+      genWsloopWrapperOp(converter, semaCtx, eval, loc, wsloopClauseOps,
-+                         wsloopReductionSyms, wsloopReductionTypes, dsp);
-+
-+  auto simdOp =
-+      genSimdWrapperOp(converter, semaCtx, eval, loc, simdClauseOps, dsp);
-+
-+  // Construct wrapper entry block list and associated symbols. It is important
-+  // that the symbol order and the block argument order match, so that the
-+  // symbol-value bindings created are correct.
-+  auto wrapperSyms =
-+      llvm::to_vector(llvm::concat<const semantics::Symbol *const>(
-+          parallelReductionSyms, dsp.getDelayedPrivSyms(),
-+          wsloopReductionSyms));
-+
-+  auto wrapperArgs = llvm::to_vector(llvm::concat<mlir::BlockArgument>(
-+      distributeOp.getRegion().getArguments(),
-+      parallelOp.getRegion().getArguments(),
-+      wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments()));
-+
-+  assert(wrapperSyms.size() == wrapperArgs.size() &&
-+         "Number of symbols and wrapper block arguments must match");
-+  genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
-+                loopNestClauseOps, iv, wrapperSyms, wrapperArgs,
-+                llvm::omp::Directive::OMPD_distribute_parallel_do_simd, dsp);
- }
+ static void genStandaloneSimd(lower::AbstractConverter &converter,
+@@ -2200,6 +2643,7 @@
+                            /*shouldCollectPreDeterminedSymbols=*/true,
+                            /*useDelayedPrivatization=*/false, &symTable);
+   dsp.processStep1();
++  dsp.processStep2();
  
--static void genCompositeDistributeSimd(lower::AbstractConverter &converter,
--                                       lower::SymMap &symTable,
--                                       semantics::SemanticsContext &semaCtx,
--                                       lower::pft::Evaluation &eval,
--                                       mlir::Location loc,
--                                       const ConstructQueue &queue,
--                                       ConstructQueue::iterator item) {
--  TODO(loc, "Composite DISTRIBUTE SIMD");
-+static void genCompositeDistributeSimd(
-+    lower::AbstractConverter &converter, lower::SymMap &symTable,
-+    semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-+    mlir::Location loc, const ConstructQueue &queue,
-+    ConstructQueue::iterator item, DataSharingProcessor &dsp) {
-+  // Clause processing.
-+  mlir::omp::DistributeClauseOps distributeClauseOps;
-+  // TODO: Process DISTRIBUTE clauses
-+
-+  mlir::omp::SimdClauseOps simdClauseOps;
-+  genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps);
-+
-+  mlir::omp::LoopNestClauseOps loopNestClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> iv;
-+  genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
-+                     loopNestClauseOps, iv);
-+
-+  // Operation creation.
-+  auto distributeOp = genDistributeWrapperOp(converter, semaCtx, eval, loc,
-+                                             distributeClauseOps, dsp);
-+
-+  auto simdOp =
-+      genSimdWrapperOp(converter, semaCtx, eval, loc, simdClauseOps, dsp);
-+
-+  // Construct wrapper entry block list and associated symbols. It is important
-+  // that the symbol order and the block argument order match, so that the
-+  // symbol-value bindings created are correct.
-+  auto wrapperArgs = llvm::to_vector(
-+      llvm::concat<mlir::BlockArgument>(distributeOp.getRegion().getArguments(),
-+                                        simdOp.getRegion().getArguments()));
-+
-+  assert(wrapperArgs.empty() &&
-+         "Block args for omp.simd and omp.distribute currently not expected");
-+  genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
-+                loopNestClauseOps, iv,
-+                /*wrapperSyms=*/{}, wrapperArgs,
-+                llvm::omp::Directive::OMPD_distribute_simd, dsp);
- }
+   mlir::omp::LoopNestOperands loopNestClauseOps;
+   llvm::SmallVector<const semantics::Symbol *> iv;
+@@ -2253,7 +2697,8 @@
+   DataSharingProcessor dsp(converter, semaCtx, doItem->clauses, eval,
+                            /*shouldCollectPreDeterminedSymbols=*/true,
+                            /*useDelayedPrivatization=*/true, &symTable);
+-  dsp.processStep1(&parallelClauseOps);
++  dsp.processStep1();
++  dsp.processStep2(&parallelClauseOps);
  
- static void genCompositeDoSimd(lower::AbstractConverter &converter,
-@@ -1922,29 +2421,51 @@
-                                semantics::SemanticsContext &semaCtx,
-                                lower::pft::Evaluation &eval, mlir::Location loc,
-                                const ConstructQueue &queue,
--                               ConstructQueue::iterator item) {
--  ClauseProcessor cp(converter, semaCtx, item->clauses);
--  cp.processTODO<clause::Aligned, clause::Allocate, clause::Linear,
--                 clause::Order, clause::Safelen, clause::Simdlen>(
--      loc, llvm::omp::OMPD_do_simd);
--  // TODO: Add support for vectorization - add vectorization hints inside loop
--  // body.
--  // OpenMP standard does not specify the length of vector instructions.
--  // Currently we safely assume that for !$omp do simd pragma the SIMD length
--  // is equal to 1 (i.e. we generate standard workshare loop).
--  // When support for vectorization is enabled, then we need to add handling of
--  // if clause. Currently if clause can be skipped because we always assume
--  // SIMD length = 1.
--  genWsloopOp(converter, symTable, semaCtx, eval, loc, queue, item);
--}
--
--static void genCompositeTaskloopSimd(lower::AbstractConverter &converter,
--                                     lower::SymMap &symTable,
--                                     semantics::SemanticsContext &semaCtx,
--                                     lower::pft::Evaluation &eval,
--                                     mlir::Location loc,
--                                     const ConstructQueue &queue,
--                                     ConstructQueue::iterator item) {
-+                               ConstructQueue::iterator item,
-+                               DataSharingProcessor &dsp) {
-+  lower::StatementContext stmtCtx;
-+
-+  // Clause processing.
-+  mlir::omp::WsloopClauseOps wsloopClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
-+  llvm::SmallVector<mlir::Type> wsloopReductionTypes;
-+  genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
-+                   wsloopClauseOps, wsloopReductionTypes, wsloopReductionSyms);
-+
-+  mlir::omp::SimdClauseOps simdClauseOps;
-+  genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps);
-+
-+  mlir::omp::LoopNestClauseOps loopNestClauseOps;
-+  llvm::SmallVector<const semantics::Symbol *> iv;
-+  genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
-+                     loopNestClauseOps, iv);
-+
-+  // Operation creation.
-+  auto wsloopOp =
-+      genWsloopWrapperOp(converter, semaCtx, eval, loc, wsloopClauseOps,
-+                         wsloopReductionSyms, wsloopReductionTypes, dsp);
-+
-+  auto simdOp =
-+      genSimdWrapperOp(converter, semaCtx, eval, loc, simdClauseOps, dsp);
-+
-+  // Construct wrapper entry block list and associated symbols. It is important
-+  // that the symbol order and the block argument order match, so that the
-+  // symbol-value bindings created are correct.
-+  auto wrapperArgs = llvm::to_vector(llvm::concat<mlir::BlockArgument>(
-+      wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments()));
-+
-+  assert(wsloopReductionSyms.size() == wrapperArgs.size() &&
-+         "Number of symbols and wrapper block arguments must match");
-+  genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
-+                loopNestClauseOps, iv, wsloopReductionSyms, wrapperArgs,
-+                llvm::omp::Directive::OMPD_do_simd, dsp);
-+}
-+
-+static void genCompositeTaskloopSimd(
-+    lower::AbstractConverter &converter, lower::SymMap &symTable,
-+    semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-+    mlir::Location loc, const ConstructQueue &queue,
-+    ConstructQueue::iterator item, DataSharingProcessor &dsp) {
-   TODO(loc, "Composite TASKLOOP SIMD");
- }
+   EntryBlockArgs parallelArgs;
+   parallelArgs.priv.syms = dsp.getDelayedPrivSymbols();
+@@ -2321,7 +2766,8 @@
+   DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
+                            /*shouldCollectPreDeterminedSymbols=*/true,
+                            /*useDelayedPrivatization=*/true, &symTable);
+-  dsp.processStep1(&parallelClauseOps);
++  dsp.processStep1();
++  dsp.processStep2(&parallelClauseOps);
  
-@@ -1957,18 +2478,33 @@
-                            semantics::SemanticsContext &semaCtx,
-                            lower::pft::Evaluation &eval, mlir::Location loc,
-                            const ConstructQueue &queue,
--                           ConstructQueue::iterator item) {
-+                           ConstructQueue::iterator item,
-+                           DataSharingProcessor *dsp) {
-   assert(item != queue.end());
-+  bool firstLoopLeaf = !dsp && llvm::omp::getDirectiveAssociation(item->id) ==
-+                                   llvm::omp::Association::Loop;
-+
-+  std::optional<DataSharingProcessor> loopDsp;
-+  if (firstLoopLeaf) {
-+    symTable.pushScope();
-+    loopDsp.emplace(converter, semaCtx, item->clauses, eval,
-+                    /*shouldCollectPreDeterminedSymbols=*/true,
-+                    enableDelayedPrivatization, &symTable);
-+    dsp = &*loopDsp;
-+    dsp->processStep1();
-+    dsp->processStep2();
-+  }
+   EntryBlockArgs parallelArgs;
+   parallelArgs.priv.syms = dsp.getDelayedPrivSymbols();
+@@ -2410,6 +2856,7 @@
+                            /*shouldCollectPreDeterminedSymbols=*/true,
+                            /*useDelayedPrivatization=*/false, &symTable);
+   dsp.processStep1();
++  dsp.processStep2();
  
-   switch (llvm::omp::Directive dir = item->id) {
-   case llvm::omp::Directive::OMPD_barrier:
-     genBarrierOp(converter, symTable, semaCtx, eval, loc, queue, item);
-     break;
-   case llvm::omp::Directive::OMPD_distribute:
--    genDistributeOp(converter, symTable, semaCtx, eval, loc, queue, item);
-+    genStandaloneDistribute(converter, symTable, semaCtx, eval, loc, queue,
-+                            item, *dsp);
-     break;
-   case llvm::omp::Directive::OMPD_do:
--    genWsloopOp(converter, symTable, semaCtx, eval, loc, queue, item);
-+    genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item, *dsp);
-     break;
-   case llvm::omp::Directive::OMPD_loop:
-   case llvm::omp::Directive::OMPD_masked:
-@@ -1982,8 +2518,7 @@
-     genOrderedRegionOp(converter, symTable, semaCtx, eval, loc, queue, item);
-     break;
-   case llvm::omp::Directive::OMPD_parallel:
--    genParallelOp(converter, symTable, semaCtx, eval, loc, queue, item,
--                  /*outerCombined=*/false);
-+    genStandaloneParallel(converter, symTable, semaCtx, eval, loc, queue, item);
-     break;
-   case llvm::omp::Directive::OMPD_section:
-     genSectionOp(converter, symTable, semaCtx, eval, loc, queue, item);
-@@ -1992,14 +2527,14 @@
-     genSectionsOp(converter, symTable, semaCtx, eval, loc, queue, item);
-     break;
-   case llvm::omp::Directive::OMPD_simd:
--    genSimdOp(converter, symTable, semaCtx, eval, loc, queue, item);
-+    genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item,
-+                      *dsp);
-     break;
-   case llvm::omp::Directive::OMPD_single:
-     genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
-     break;
-   case llvm::omp::Directive::OMPD_target:
--    genTargetOp(converter, symTable, semaCtx, eval, loc, queue, item,
--                /*outerCombined=*/false);
-+    genTargetOp(converter, symTable, semaCtx, eval, loc, queue, item);
-     break;
-   case llvm::omp::Directive::OMPD_target_data:
-     genTargetDataOp(converter, symTable, semaCtx, eval, loc, queue, item);
-@@ -2023,7 +2558,8 @@
-     genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item);
-     break;
-   case llvm::omp::Directive::OMPD_taskloop:
--    genTaskloopOp(converter, symTable, semaCtx, eval, loc, queue, item);
-+    genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item,
-+                          *dsp);
-     break;
-   case llvm::omp::Directive::OMPD_taskwait:
-     genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item);
-@@ -2049,26 +2585,30 @@
-   // Composite constructs
-   case llvm::omp::Directive::OMPD_distribute_parallel_do:
-     genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc,
--                                     queue, item);
-+                                     queue, item, *dsp);
-     break;
-   case llvm::omp::Directive::OMPD_distribute_parallel_do_simd:
-     genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
--                                         loc, queue, item);
-+                                         loc, queue, item, *dsp);
-     break;
-   case llvm::omp::Directive::OMPD_distribute_simd:
-     genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, queue,
--                               item);
-+                               item, *dsp);
-     break;
-   case llvm::omp::Directive::OMPD_do_simd:
--    genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item);
-+    genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item,
-+                       *dsp);
-     break;
-   case llvm::omp::Directive::OMPD_taskloop_simd:
-     genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, queue,
--                             item);
-+                             item, *dsp);
-     break;
-   default:
-     break;
-   }
-+
-+  if (firstLoopLeaf)
-+    symTable.popScope();
- }
+   // Pass the innermost leaf construct's clauses because that's where COLLAPSE
+   // is placed by construct decomposition.
+@@ -2467,6 +2914,7 @@
+                            /*shouldCollectPreDeterminedSymbols=*/true,
+                            /*useDelayedPrivatization=*/false, &symTable);
+   dsp.processStep1();
++  dsp.processStep2();
  
- //===----------------------------------------------------------------------===//
-@@ -2389,6 +2929,7 @@
+   // Pass the innermost leaf construct's clauses because that's where COLLAPSE
+   // is placed by construct decomposition.
+@@ -3019,6 +3467,7 @@
    ConstructQueue queue{
        buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
                            eval, source, directive, clauses)};
@@ -6323,47 +6221,17 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/OpenMP.cp
    genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                   queue.begin());
  }
-@@ -2416,6 +2957,7 @@
-       std::get<parser::OmpEndSectionsDirective>(sectionsConstruct.t);
+@@ -3044,6 +3493,7 @@
+       std::get<parser::OmpSectionBlocks>(sectionsConstruct.t);
    clauses.append(makeClauses(
        std::get<parser::OmpClauseList>(endSectionsDirective.t), semaCtx));
 +
    mlir::Location currentLocation = converter.getCurrentLocation();
  
    llvm::omp::Directive directive =
-@@ -2433,9 +2975,8 @@
-                    semantics::SemanticsContext &semaCtx,
-                    lower::pft::Evaluation &eval,
-                    const parser::OpenMPConstruct &ompConstruct) {
--  std::visit(
--      [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); },
--      ompConstruct.u);
-+  std::visit([&](auto &&s) { genOMP(converter, symTable, semaCtx, eval, s); },
-+             ompConstruct.u);
- }
- 
- //===----------------------------------------------------------------------===//
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/ReductionProcessor.cpp llvm-project/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
---- llvm-project.orig/flang/lib/Lower/OpenMP/ReductionProcessor.cpp	2024-06-12 10:43:12.624210453 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/ReductionProcessor.cpp	2024-06-12 10:44:09.351614239 -0500
-@@ -70,6 +70,14 @@
-   }
- }
- 
-+void ReductionProcessor::addReductionSym(
-+    const omp::clause::Reduction &reduction,
-+    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &symbols) {
-+  const auto &objectList{std::get<omp::ObjectList>(reduction.t)};
-+  llvm::transform(objectList, std::back_inserter(symbols),
-+                  [](const Object &object) { return object.sym(); });
-+}
-+
- bool ReductionProcessor::supportedIntrinsicProcReduction(
-     const omp::clause::ProcedureDesignator &pd) {
-   semantics::Symbol *sym = pd.v.sym();
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/ReductionProcessor.h llvm-project/flang/lib/Lower/OpenMP/ReductionProcessor.h
---- llvm-project.orig/flang/lib/Lower/OpenMP/ReductionProcessor.h	2024-06-12 10:43:12.624210453 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/ReductionProcessor.h	2024-06-12 10:44:09.351614239 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/ReductionProcessor.h llvm-project-aso/flang/lib/Lower/OpenMP/ReductionProcessor.h
+--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/ReductionProcessor.h	2024-10-18 17:40:32.496992373 -0500
++++ llvm-project-aso/flang/lib/Lower/OpenMP/ReductionProcessor.h	2024-11-23 20:39:47.180175366 -0600
 @@ -13,10 +13,9 @@
  #ifndef FORTRAN_LOWER_REDUCTIONPROCESSOR_H
  #define FORTRAN_LOWER_REDUCTIONPROCESSOR_H
@@ -6376,21 +6244,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Reduction
  #include "flang/Semantics/symbol.h"
  #include "flang/Semantics/type.h"
  #include "mlir/IR/Location.h"
-@@ -108,6 +107,10 @@
-                                           mlir::Type type, mlir::Value op1,
-                                           mlir::Value op2);
- 
-+  static void addReductionSym(
-+      const omp::clause::Reduction &reduction,
-+      llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &symbols);
-+
-   /// Creates an OpenMP reduction declaration and inserts it into the provided
-   /// symbol table. The declaration has a constant initializer with the neutral
-   /// value `initValue`, and the reduction combiner carried over from `reduce`.
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Utils.cpp llvm-project/flang/lib/Lower/OpenMP/Utils.cpp
---- llvm-project.orig/flang/lib/Lower/OpenMP/Utils.cpp	2024-06-12 10:43:12.624210453 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/Utils.cpp	2024-06-12 10:44:09.351614239 -0500
-@@ -10,17 +10,18 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/Utils.cpp llvm-project-aso/flang/lib/Lower/OpenMP/Utils.cpp
+--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/Utils.cpp	2024-11-23 20:25:26.839275178 -0600
++++ llvm-project-aso/flang/lib/Lower/OpenMP/Utils.cpp	2024-11-23 20:39:47.180175366 -0600
+@@ -10,20 +10,26 @@
  //
  //===----------------------------------------------------------------------===//
  
@@ -6398,132 +6255,31 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Utils.cpp
 +#include <flang/Lower/OpenMP/Utils.h>
  
 -#include "Clauses.h"
+ #include <DirectivesCommon.h>
+ 
++#include <flang/Evaluate/fold.h>
  #include <flang/Lower/AbstractConverter.h>
++#include <flang/Lower/ConvertExprToHLFIR.h>
  #include <flang/Lower/ConvertType.h>
 +#include <flang/Lower/OpenMP/Clauses.h>
  #include <flang/Lower/PFTBuilder.h>
++#include <flang/Lower/StatementContext.h>
++#include <flang/Lower/SymbolMap.h>
  #include <flang/Optimizer/Builder/FIRBuilder.h>
+ #include <flang/Optimizer/Builder/Todo.h>
  #include <flang/Parser/parse-tree.h>
  #include <flang/Parser/tools.h>
  #include <flang/Semantics/tools.h>
  #include <llvm/Support/CommandLine.h>
++#include <mlir/Analysis/TopologicalSortUtils.h>
 +#include <mlir/Dialect/Arith/IR/Arith.h>
  
- #include <algorithm>
- #include <numeric>
-@@ -349,6 +350,108 @@
-   return sym;
- }
+ #include <iterator>
  
-+mlir::omp::MapInfoOp
-+createMapInfoOp(mlir::OpBuilder &builder, mlir::Location loc,
-+                mlir::Value baseAddr, mlir::Value varPtrPtr, std::string name,
-+                llvm::ArrayRef<mlir::Value> bounds,
-+                llvm::ArrayRef<mlir::Value> members,
-+                mlir::DenseIntElementsAttr membersIndex, uint64_t mapType,
-+                mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy,
-+                bool partialMap) {
-+  if (auto boxTy = llvm::dyn_cast<fir::BaseBoxType>(baseAddr.getType())) {
-+    baseAddr = builder.create<fir::BoxAddrOp>(loc, baseAddr);
-+    retTy = baseAddr.getType();
-+  }
-+
-+  mlir::TypeAttr varType = mlir::TypeAttr::get(
-+      llvm::cast<mlir::omp::PointerLikeType>(retTy).getElementType());
-+
-+  mlir::omp::MapInfoOp op = builder.create<mlir::omp::MapInfoOp>(
-+      loc, retTy, baseAddr, varType, varPtrPtr, members, membersIndex, bounds,
-+      builder.getIntegerAttr(builder.getIntegerType(64, false), mapType),
-+      builder.getAttr<mlir::omp::VariableCaptureKindAttr>(mapCaptureType),
-+      builder.getStringAttr(name), builder.getBoolAttr(partialMap));
-+
-+  return op;
-+}
-+
-+mlir::Value calculateTripCount(fir::FirOpBuilder &builder, mlir::Location loc,
-+                               const mlir::omp::CollapseClauseOps &ops) {
-+  using namespace mlir::arith;
-+  assert(ops.loopLBVar.size() == ops.loopUBVar.size() &&
-+         ops.loopLBVar.size() == ops.loopStepVar.size() &&
-+         !ops.loopLBVar.empty() && "Invalid bounds or step");
-+
-+  // Get the bit width of an integer-like type.
-+  auto widthOf = [](mlir::Type ty) -> unsigned {
-+    if (mlir::isa<mlir::IndexType>(ty)) {
-+      return mlir::IndexType::kInternalStorageBitWidth;
-+    }
-+    if (auto tyInt = mlir::dyn_cast<mlir::IntegerType>(ty)) {
-+      return tyInt.getWidth();
-+    }
-+    llvm_unreachable("Unexpected type");
-+  };
-+
-+  // For a type that is either IntegerType or IndexType, return the
-+  // equivalent IntegerType. In the former case this is a no-op.
-+  auto asIntTy = [&](mlir::Type ty) -> mlir::IntegerType {
-+    if (ty.isIndex()) {
-+      return mlir::IntegerType::get(ty.getContext(), widthOf(ty));
-+    }
-+    assert(ty.isIntOrIndex() && "Unexpected type");
-+    return mlir::cast<mlir::IntegerType>(ty);
-+  };
-+
-+  // For two given values, establish a common signless IntegerType
-+  // that can represent any value of type of x and of type of y,
-+  // and return the pair of x, y converted to the new type.
-+  auto unifyToSignless =
-+      [&](fir::FirOpBuilder &b, mlir::Value x,
-+          mlir::Value y) -> std::pair<mlir::Value, mlir::Value> {
-+    auto tyX = asIntTy(x.getType()), tyY = asIntTy(y.getType());
-+    unsigned width = std::max(widthOf(tyX), widthOf(tyY));
-+    auto wideTy = mlir::IntegerType::get(b.getContext(), width,
-+                                         mlir::IntegerType::Signless);
-+    return std::make_pair(b.createConvert(loc, wideTy, x),
-+                          b.createConvert(loc, wideTy, y));
-+  };
-+
-+  // Start with signless i32 by default.
-+  auto tripCount = builder.createIntegerConstant(loc, builder.getI32Type(), 1);
-+
-+  for (auto [origLb, origUb, origStep] :
-+       llvm::zip(ops.loopLBVar, ops.loopUBVar, ops.loopStepVar)) {
-+    auto tmpS0 = builder.createIntegerConstant(loc, origStep.getType(), 0);
-+    auto [step, step0] = unifyToSignless(builder, origStep, tmpS0);
-+    auto reverseCond =
-+        builder.create<CmpIOp>(loc, CmpIPredicate::slt, step, step0);
-+    auto negStep = builder.create<SubIOp>(loc, step0, step);
-+    mlir::Value absStep =
-+        builder.create<SelectOp>(loc, reverseCond, negStep, step);
-+
-+    auto [lb, ub] = unifyToSignless(builder, origLb, origUb);
-+    auto start = builder.create<SelectOp>(loc, reverseCond, ub, lb);
-+    auto end = builder.create<SelectOp>(loc, reverseCond, lb, ub);
-+
-+    mlir::Value range = builder.create<SubIOp>(loc, end, start);
-+    auto rangeCond =
-+        builder.create<CmpIOp>(loc, CmpIPredicate::slt, end, start);
-+    std::tie(range, absStep) = unifyToSignless(builder, range, absStep);
-+    // numSteps = (range /u absStep) + 1
-+    auto numSteps = builder.create<AddIOp>(
-+        loc, builder.create<DivUIOp>(loc, range, absStep),
-+        builder.createIntegerConstant(loc, range.getType(), 1));
-+
-+    auto trip0 = builder.createIntegerConstant(loc, numSteps.getType(), 0);
-+    auto loopTripCount =
-+        builder.create<SelectOp>(loc, rangeCond, trip0, numSteps);
-+    auto [totalTC, thisTC] = unifyToSignless(builder, tripCount, loopTripCount);
-+    tripCount = builder.create<MulIOp>(loc, totalTC, thisTC);
-+  }
-+
-+  return tripCount;
-+}
- } // namespace omp
- } // namespace lower
- } // namespace Fortran
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Utils.h llvm-project/flang/lib/Lower/OpenMP/Utils.h
---- llvm-project.orig/flang/lib/Lower/OpenMP/Utils.h	2024-06-12 10:43:12.624210453 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/Utils.h	1969-12-31 18:00:00.000000000 -0600
-@@ -1,107 +0,0 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Lower/OpenMP/Utils.h llvm-project-aso/flang/lib/Lower/OpenMP/Utils.h
+--- llvm-project-aso-orig/flang/lib/Lower/OpenMP/Utils.h	2024-11-23 20:25:26.839275178 -0600
++++ llvm-project-aso/flang/lib/Lower/OpenMP/Utils.h	1969-12-31 18:00:00.000000000 -0600
+@@ -1,169 +0,0 @@
 -//===-- Lower/OpenMP/Utils.h ------------------------------------*- C++ -*-===//
 -//
 -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -6540,6 +6296,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Utils.h l
 -#include "mlir/IR/Location.h"
 -#include "mlir/IR/Value.h"
 -#include "llvm/Support/CommandLine.h"
+-#include <cstdint>
 -
 -extern llvm::cl::opt<bool> treatIndexAsSection;
 -extern llvm::cl::opt<bool> enableDelayedPrivatization;
@@ -6560,6 +6317,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Utils.h l
 -} // namespace parser
 -
 -namespace lower {
+-class StatementContext;
 -namespace pft {
 -struct Evaluation;
 -}
@@ -6575,38 +6333,97 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Utils.h l
 -// and index data when lowering OpenMP map clauses. Keeps track of the
 -// placement of the component in the derived type hierarchy it rests within,
 -// alongside the generated mlir::omp::MapInfoOp for the mapped component.
--struct OmpMapMemberIndicesData {
+-//
+-// As an example of what the contents of this data structure may be like,
+-// when provided the following derived type and map of that type:
+-//
+-// type :: bottom_layer
+-//   real(8) :: i2
+-//   real(4) :: array_i2(10)
+-//   real(4) :: array_j2(10)
+-// end type bottom_layer
+-//
+-// type :: top_layer
+-//   real(4) :: i
+-//   integer(4) :: array_i(10)
+-//   real(4) :: j
+-//   type(bottom_layer) :: nested
+-//   integer, allocatable :: array_j(:)
+-//   integer(4) :: k
+-// end type top_layer
+-//
+-// type(top_layer) :: top_dtype
+-//
+-// map(tofrom: top_dtype%nested%i2, top_dtype%k, top_dtype%nested%array_i2)
+-//
+-// We would end up with an OmpMapParentAndMemberData populated like below:
+-//
+-// memberPlacementIndices:
+-//  Vector 1: 3, 0
+-//  Vector 2: 5
+-//  Vector 3: 3, 1
+-//
+-// memberMap:
+-// Entry 1: omp.map.info for "top_dtype%nested%i2"
+-// Entry 2: omp.map.info for "top_dtype%k"
+-// Entry 3: omp.map.info for "top_dtype%nested%array_i2"
+-//
+-// And this OmpMapParentAndMemberData would be accessed via the parent
+-// symbol for top_dtype. Other parent derived type instances that have
+-// members mapped would have there own OmpMapParentAndMemberData entry
+-// accessed via their own symbol.
+-struct OmpMapParentAndMemberData {
 -  // The indices representing the component members placement in its derived
 -  // type parents hierarchy.
--  llvm::SmallVector<int> memberPlacementIndices;
+-  llvm::SmallVector<llvm::SmallVector<int64_t>> memberPlacementIndices;
 -
 -  // Placement of the member in the member vector.
--  mlir::omp::MapInfoOp memberMap;
+-  llvm::SmallVector<mlir::omp::MapInfoOp> memberMap;
+-
+-  bool isDuplicateMemberMapInfo(llvm::SmallVectorImpl<int64_t> &memberIndices) {
+-    return llvm::find_if(memberPlacementIndices, [&](auto &memberData) {
+-             return llvm::equal(memberIndices, memberData);
+-           }) != memberPlacementIndices.end();
+-  }
+-
+-  void addChildIndexAndMapToParent(const omp::Object &object,
+-                                   mlir::omp::MapInfoOp &mapOp,
+-                                   semantics::SemanticsContext &semaCtx);
 -};
 -
 -mlir::omp::MapInfoOp
 -createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
--                mlir::Value baseAddr, mlir::Value varPtrPtr, std::string name,
--                mlir::ArrayRef<mlir::Value> bounds,
--                mlir::ArrayRef<mlir::Value> members,
--                mlir::DenseIntElementsAttr membersIndex, uint64_t mapType,
+-                mlir::Value baseAddr, mlir::Value varPtrPtr,
+-                llvm::StringRef name, llvm::ArrayRef<mlir::Value> bounds,
+-                llvm::ArrayRef<mlir::Value> members,
+-                mlir::ArrayAttr membersIndex, uint64_t mapType,
 -                mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy,
 -                bool partialMap = false);
 -
--void addChildIndexAndMapToParent(
--    const omp::Object &object,
--    std::map<const semantics::Symbol *,
--             llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
--    mlir::omp::MapInfoOp &mapOp, semantics::SemanticsContext &semaCtx);
--
 -void insertChildMapInfoIntoParent(
--    lower::AbstractConverter &converter,
--    std::map<const semantics::Symbol *,
--             llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
+-    Fortran::lower::AbstractConverter &converter,
+-    Fortran::semantics::SemanticsContext &semaCtx,
+-    Fortran::lower::StatementContext &stmtCtx,
+-    std::map<Object, OmpMapParentAndMemberData> &parentMemberIndices,
 -    llvm::SmallVectorImpl<mlir::Value> &mapOperands,
--    llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms,
--    llvm::SmallVectorImpl<mlir::Type> *mapSymTypes,
--    llvm::SmallVectorImpl<mlir::Location> *mapSymLocs);
+-    llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms);
+-
+-void generateMemberPlacementIndices(
+-    const Object &object, llvm::SmallVectorImpl<int64_t> &indices,
+-    Fortran::semantics::SemanticsContext &semaCtx);
+-
+-bool isMemberOrParentAllocatableOrPointer(
+-    const Object &object, Fortran::semantics::SemanticsContext &semaCtx);
+-
+-mlir::Value createParentSymAndGenIntermediateMaps(
+-    mlir::Location clauseLocation, Fortran::lower::AbstractConverter &converter,
+-    semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx,
+-    omp::ObjectList &objectList, llvm::SmallVectorImpl<int64_t> &indices,
+-    OmpMapParentAndMemberData &parentMemberIndices, llvm::StringRef asFortran,
+-    llvm::omp::OpenMPOffloadMappingFlags mapTypeBits);
+-
+-omp::ObjectList gatherObjectsOf(omp::Object derivedTypeMember,
+-                                semantics::SemanticsContext &semaCtx);
 -
 -mlir::Type getLoopVarType(lower::AbstractConverter &converter,
 -                          std::size_t loopVarTypeSize);
@@ -6620,115 +6437,43 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Lower/OpenMP/Utils.h l
 -
 -int64_t getCollapseValue(const List<Clause> &clauses);
 -
--semantics::Symbol *getOmpObjectSymbol(const parser::OmpObject &ompObject);
--
 -void genObjectList(const ObjectList &objects,
 -                   lower::AbstractConverter &converter,
 -                   llvm::SmallVectorImpl<mlir::Value> &operands);
 -
+-void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp,
+-                                     mlir::Location loc);
+-
 -} // namespace omp
 -} // namespace lower
 -} // namespace Fortran
 -
 -#endif // FORTRAN_LOWER_OPENMPUTILS_H
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Builder/FIRBuilder.cpp llvm-project/flang/lib/Optimizer/Builder/FIRBuilder.cpp
---- llvm-project.orig/flang/lib/Optimizer/Builder/FIRBuilder.cpp	2024-06-12 10:43:12.624210453 -0500
-+++ llvm-project/flang/lib/Optimizer/Builder/FIRBuilder.cpp	2024-06-12 10:44:09.351614239 -0500
-@@ -250,7 +250,37 @@
-   if (auto ompOutlineableIface =
-           getRegion()
-               .getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>()) {
--    return ompOutlineableIface.getAllocaBlock();
-+    // omp.parallel can work as a block construct but it can also be a loop
-+    // wrapper when part of a composite construct. Make sure it's only treated
-+    // as a block if it's not a wrapper.
-+    auto parallelOp =
-+        llvm::dyn_cast<mlir::omp::ParallelOp>(*ompOutlineableIface);
-+    if (!parallelOp || !llvm::isa_and_present<mlir::omp::DistributeOp>(
-+                           parallelOp->getParentOp()))
-+      return ompOutlineableIface.getAllocaBlock();
-+  }
-+
-+  // All allocations associated with an OpenMP loop wrapper must happen outside
-+  // of all wrappers.
-+  mlir::Operation *currentOp = getRegion().getParentOp();
-+  auto wrapperIface =
-+      llvm::isa<mlir::omp::LoopNestOp>(currentOp)
-+          ? llvm::cast<mlir::omp::LoopWrapperInterface>(
-+                currentOp->getParentOp())
-+          : llvm::dyn_cast<mlir::omp::LoopWrapperInterface>(currentOp);
-+  if (wrapperIface) {
-+    // Cannot use LoopWrapperInterface methods here because the whole nest may
-+    // not have been created at this point. Manually traverse parents instead.
-+    mlir::omp::LoopWrapperInterface lastWrapperOp = wrapperIface;
-+    while (true) {
-+      if (auto nextWrapper =
-+              llvm::dyn_cast_if_present<mlir::omp::LoopWrapperInterface>(
-+                  lastWrapperOp->getParentOp()))
-+        lastWrapperOp = nextWrapper;
-+      else
-+        break;
-+    }
-+    return &lastWrapperOp->getParentRegion()->front();
-   }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/CMakeLists.txt llvm-project-aso/flang/lib/Optimizer/OpenMP/CMakeLists.txt
+--- llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/CMakeLists.txt	2024-11-23 20:25:26.843275164 -0600
++++ llvm-project-aso/flang/lib/Optimizer/OpenMP/CMakeLists.txt	2024-11-23 20:39:47.180175366 -0600
+@@ -1,7 +1,9 @@
+ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
  
-   if (auto recipeIface =
-@@ -267,9 +297,15 @@
-     llvm::ArrayRef<mlir::NamedAttribute> attrs) {
-   assert(!mlir::isa<fir::ReferenceType>(type) && "cannot be a reference");
-   // If the alloca is inside an OpenMP Op which will be outlined then pin
--  // the alloca here.
--  const bool pinned =
-+  // the alloca here. Make sure that an omp.parallel operation that is taking
-+  // a loop wrapper role is not detected as outlineable here.
-+  auto iface =
-       getRegion().getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
-+  auto parallelOp =
-+      iface ? llvm::dyn_cast<mlir::omp::ParallelOp>(*iface) : nullptr;
-+  const bool pinned =
-+      iface && (!parallelOp || !llvm::isa_and_present<mlir::omp::DistributeOp>(
-+                                   parallelOp->getParentOp()));
-   mlir::Value temp =
-       create<fir::AllocaOp>(loc, type, /*unique_name=*/llvm::StringRef{}, name,
-                             pinned, lenParams, shape, attrs);
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp llvm-project/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
---- llvm-project.orig/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp	2024-06-12 10:43:12.628210411 -0500
-+++ llvm-project/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp	2024-06-12 10:44:09.351614239 -0500
-@@ -278,9 +278,16 @@
- // 3. The first ancestor that is an OpenMP Op or a LLVMFuncOp
- mlir::Block *
- ConvertFIRToLLVMPattern::getBlockForAllocaInsert(mlir::Operation *op) const {
--  if (auto iface = mlir::dyn_cast<mlir::omp::OutlineableOpenMPOpInterface>(op))
--    return iface.getAllocaBlock();
--  if (auto llvmFuncOp = mlir::dyn_cast<mlir::LLVM::LLVMFuncOp>(op))
-+  if (auto iface =
-+          mlir::dyn_cast<mlir::omp::OutlineableOpenMPOpInterface>(op)) {
-+    // omp.parallel can work as a block construct but it can also be a loop
-+    // wrapper when it's part of a composite construct. Make sure it's only
-+    // treated as a block if it's not a wrapper.
-+    auto parallelOp = llvm::dyn_cast<mlir::omp::ParallelOp>(*iface);
-+    if (!parallelOp || !llvm::isa_and_present<mlir::omp::DistributeOp>(
-+                           parallelOp->getParentOp()))
-+      return iface.getAllocaBlock();
-+  } else if (auto llvmFuncOp = mlir::dyn_cast<mlir::LLVM::LLVMFuncOp>(op))
-     return &llvmFuncOp.front();
- 
-   return getBlockForAllocaInsert(op->getParentOp());
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/CMakeLists.txt llvm-project/flang/lib/Optimizer/Transforms/CMakeLists.txt
---- llvm-project.orig/flang/lib/Optimizer/Transforms/CMakeLists.txt	2024-06-12 10:43:12.632210369 -0500
-+++ llvm-project/flang/lib/Optimizer/Transforms/CMakeLists.txt	2024-06-12 10:44:09.351614239 -0500
-@@ -24,6 +24,7 @@
-   VScaleAttr.cpp
-   FunctionAttr.cpp
-   DebugTypeGenerator.cpp
+ add_flang_library(FlangOpenMPTransforms
 +  DoConcurrentConversion.cpp
- 
-   DEPENDS
-   FIRDialect
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp llvm-project/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp
---- llvm-project.orig/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp	1969-12-31 18:00:00.000000000 -0600
-+++ llvm-project/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp	2024-06-12 10:44:09.351614239 -0500
-@@ -0,0 +1,625 @@
+   FunctionFiltering.cpp
++  GlobalFiltering.cpp
+   MapsForPrivatizedSymbols.cpp
+   MapInfoFinalization.cpp
+   MarkDeclareTarget.cpp
+@@ -21,6 +23,7 @@
+   FIRSupport
+   FortranCommon
+   MLIRFuncDialect
++  MLIRMathTransforms
+   MLIROpenMPDialect
+   HLFIRDialect
+   MLIRIR
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp llvm-project-aso/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
+--- llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp	2024-11-23 20:39:47.180175366 -0600
+@@ -0,0 +1,1037 @@
 +//===- DoConcurrentConversion.cpp -- map `DO CONCURRENT` to OpenMP loops --===//
 +//
 +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -6745,8 +6490,11 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D
 +#include "flang/Optimizer/Dialect/Support/FIRContext.h"
 +#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 +#include "flang/Optimizer/HLFIR/HLFIROps.h"
-+#include "flang/Optimizer/Transforms/Passes.h"
++#include "flang/Optimizer/OpenMP/Passes.h"
++#include "mlir/Analysis/SliceAnalysis.h"
++#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 +#include "mlir/Dialect/Func/IR/FuncOps.h"
++#include "mlir/Dialect/Math/IR/Math.h"
 +#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 +#include "mlir/IR/Diagnostics.h"
 +#include "mlir/IR/IRMapping.h"
@@ -6755,15 +6503,17 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D
 +#include "mlir/Transforms/RegionUtils.h"
 +#include "llvm/Frontend/OpenMP/OMPConstants.h"
 +
++#include <algorithm>
 +#include <memory>
 +#include <utility>
 +
-+namespace fir {
++namespace flangomp {
 +#define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS
-+#include "flang/Optimizer/Transforms/Passes.h.inc"
-+} // namespace fir
++#include "flang/Optimizer/OpenMP/Passes.h.inc"
++} // namespace flangomp
 +
-+#define DEBUG_TYPE "fopenmp-do-concurrent-conversion"
++#define DEBUG_TYPE "do-concurrent-conversion"
++#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ")
 +
 +namespace Fortran {
 +namespace lower {
@@ -6772,14 +6522,12 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D
 +// TODO The following 2 functions are copied from "flang/Lower/OpenMP/Utils.h".
 +// This duplication is temporary until we find a solution for a shared location
 +// for these utils that does not introduce circular CMake deps.
-+mlir::omp::MapInfoOp
-+createMapInfoOp(mlir::OpBuilder &builder, mlir::Location loc,
-+                mlir::Value baseAddr, mlir::Value varPtrPtr, std::string name,
-+                llvm::ArrayRef<mlir::Value> bounds,
-+                llvm::ArrayRef<mlir::Value> members,
-+                mlir::DenseIntElementsAttr membersIndex, uint64_t mapType,
-+                mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy,
-+                bool partialMap = false) {
++mlir::omp::MapInfoOp createMapInfoOp(
++    mlir::OpBuilder &builder, mlir::Location loc, mlir::Value baseAddr,
++    mlir::Value varPtrPtr, std::string name, llvm::ArrayRef<mlir::Value> bounds,
++    llvm::ArrayRef<mlir::Value> members, mlir::ArrayAttr membersIndex,
++    uint64_t mapType, mlir::omp::VariableCaptureKind mapCaptureType,
++    mlir::Type retTy, bool partialMap = false) {
 +  if (auto boxTy = llvm::dyn_cast<fir::BaseBoxType>(baseAddr.getType())) {
 +    baseAddr = builder.create<fir::BoxAddrOp>(loc, baseAddr);
 +    retTy = baseAddr.getType();
@@ -6788,6 +6536,13 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D
 +  mlir::TypeAttr varType = mlir::TypeAttr::get(
 +      llvm::cast<mlir::omp::PointerLikeType>(retTy).getElementType());
 +
++  // For types with unknown extents such as <2x?xi32> we discard the incomplete
++  // type info and only retain the base type. The correct dimensions are later
++  // recovered through the bounds info.
++  if (auto seqType = llvm::dyn_cast<fir::SequenceType>(varType.getValue()))
++    if (seqType.hasDynamicExtents())
++      varType = mlir::TypeAttr::get(seqType.getEleTy());
++
 +  mlir::omp::MapInfoOp op = builder.create<mlir::omp::MapInfoOp>(
 +      loc, retTy, baseAddr, varType, varPtrPtr, members, membersIndex, bounds,
 +      builder.getIntegerAttr(builder.getIntegerType(64, false), mapType),
@@ -6797,95 +6552,478 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D
 +  return op;
 +}
 +
-+mlir::Value calculateTripCount(fir::FirOpBuilder &builder, mlir::Location loc,
-+                               const mlir::omp::CollapseClauseOps &ops) {
-+  using namespace mlir::arith;
-+  assert(ops.loopLBVar.size() == ops.loopUBVar.size() &&
-+         ops.loopLBVar.size() == ops.loopStepVar.size() &&
-+         !ops.loopLBVar.empty() && "Invalid bounds or step");
-+
-+  // Get the bit width of an integer-like type.
-+  auto widthOf = [](mlir::Type ty) -> unsigned {
-+    if (mlir::isa<mlir::IndexType>(ty)) {
-+      return mlir::IndexType::kInternalStorageBitWidth;
-+    }
-+    if (auto tyInt = mlir::dyn_cast<mlir::IntegerType>(ty)) {
-+      return tyInt.getWidth();
++/// Check if cloning the bounds introduced any dependency on the outer region.
++/// If so, then either clone them as well if they are MemoryEffectFree, or else
++/// copy them to a new temporary and add them to the map and block_argument
++/// lists and replace their uses with the new temporary.
++///
++/// TODO: similar to the above functions, this is copied from OpenMP lowering
++/// (in this case, from `genBodyOfTargetOp`). Once we move to a common lib for
++/// these utils this will move as well.
++void cloneOrMapRegionOutsiders(fir::FirOpBuilder &builder,
++                               mlir::omp::TargetOp targetOp) {
++  mlir::Region &targetRegion = targetOp.getRegion();
++  mlir::Block *targetEntryBlock = &targetRegion.getBlocks().front();
++  llvm::SetVector<mlir::Value> valuesDefinedAbove;
++  mlir::getUsedValuesDefinedAbove(targetRegion, valuesDefinedAbove);
++
++  while (!valuesDefinedAbove.empty()) {
++    for (mlir::Value val : valuesDefinedAbove) {
++      mlir::Operation *valOp = val.getDefiningOp();
++      assert(valOp != nullptr);
++      if (mlir::isMemoryEffectFree(valOp)) {
++        mlir::Operation *clonedOp = valOp->clone();
++        targetEntryBlock->push_front(clonedOp);
++        assert(clonedOp->getNumResults() == 1);
++        val.replaceUsesWithIf(
++            clonedOp->getResult(0), [targetEntryBlock](mlir::OpOperand &use) {
++              return use.getOwner()->getBlock() == targetEntryBlock;
++            });
++      } else {
++        mlir::OpBuilder::InsertionGuard guard(builder);
++        builder.setInsertionPointAfter(valOp);
++        auto copyVal = builder.createTemporary(val.getLoc(), val.getType());
++        builder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
++
++        llvm::SmallVector<mlir::Value> bounds;
++        std::stringstream name;
++        builder.setInsertionPoint(targetOp);
++        mlir::Value mapOp = createMapInfoOp(
++            builder, copyVal.getLoc(), copyVal,
++            /*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
++            /*members=*/llvm::SmallVector<mlir::Value>{},
++            /*membersIndex=*/mlir::ArrayAttr{},
++            static_cast<
++                std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
++                llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
++            mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType());
++        targetOp.getMapVarsMutable().append(mapOp);
++        mlir::Value clonedValArg =
++            targetRegion.addArgument(copyVal.getType(), copyVal.getLoc());
++        builder.setInsertionPointToStart(targetEntryBlock);
++        auto loadOp =
++            builder.create<fir::LoadOp>(clonedValArg.getLoc(), clonedValArg);
++        val.replaceUsesWithIf(
++            loadOp->getResult(0), [targetEntryBlock](mlir::OpOperand &use) {
++              return use.getOwner()->getBlock() == targetEntryBlock;
++            });
++      }
 +    }
-+    llvm_unreachable("Unexpected type");
-+  };
++    valuesDefinedAbove.clear();
++    mlir::getUsedValuesDefinedAbove(targetRegion, valuesDefinedAbove);
++  }
++}
++} // namespace internal
++} // namespace omp
++} // namespace lower
++} // namespace Fortran
++
++namespace {
++namespace looputils {
++/// Stores info needed about the induction/iteration variable for each `do
++/// concurrent` in a loop nest. This includes:
++/// * the operation allocating memory for iteration variable,
++/// * the operation(s) updating the iteration variable with the current
++///   iteration number.
++struct InductionVariableInfo {
++  mlir::Operation *iterVarMemDef;
++  llvm::SetVector<mlir::Operation *> indVarUpdateOps;
++};
++
++using LoopNestToIndVarMap =
++    llvm::MapVector<fir::DoLoopOp, InductionVariableInfo>;
++
++/// Given an operation `op`, this returns true if `op`'s operand is ultimately
++/// the loop's induction variable. Detecting this helps finding the live-in
++/// value corresponding to the induction variable in case the induction variable
++/// is indirectly used in the loop (e.g. throught a cast op).
++bool isIndVarUltimateOperand(mlir::Operation *op, fir::DoLoopOp doLoop) {
++  while (op != nullptr && op->getNumOperands() > 0) {
++    auto ivIt = llvm::find_if(op->getOperands(), [&](mlir::Value operand) {
++      return operand == doLoop.getInductionVar();
++    });
++
++    if (ivIt != op->getOperands().end())
++      return true;
++
++    op = op->getOperand(0).getDefiningOp();
++  }
++
++  return false;
++}
++
++/// For the \p doLoop parameter, find the operations that declares its induction
++/// variable or allocates memory for it.
++mlir::Operation *findLoopIndVarMemDecl(fir::DoLoopOp doLoop) {
++  mlir::Value result = nullptr;
++  mlir::visitUsedValuesDefinedAbove(
++      doLoop.getRegion(), [&](mlir::OpOperand *operand) {
++        if (isIndVarUltimateOperand(operand->getOwner(), doLoop)) {
++          assert(result == nullptr &&
++                 "loop can have only one induction variable");
++          result = operand->get();
++        }
++      });
++
++  assert(result != nullptr && result.getDefiningOp() != nullptr);
++  return result.getDefiningOp();
++}
++
++/// Collect the list of values used inside the loop but defined outside of it.
++void collectLoopLiveIns(fir::DoLoopOp doLoop,
++                        llvm::SmallVectorImpl<mlir::Value> &liveIns) {
++  llvm::SmallDenseSet<mlir::Value> seenValues;
++  llvm::SmallDenseSet<mlir::Operation *> seenOps;
++
++  mlir::visitUsedValuesDefinedAbove(
++      doLoop.getRegion(), [&](mlir::OpOperand *operand) {
++        if (!seenValues.insert(operand->get()).second)
++          return;
++
++        mlir::Operation *definingOp = operand->get().getDefiningOp();
++        // We want to collect ops corresponding to live-ins only once.
++        if (definingOp && !seenOps.insert(definingOp).second)
++          return;
++
++        liveIns.push_back(operand->get());
++      });
++}
++
++/// Collects the op(s) responsible for updating a loop's iteration variable with
++/// the current iteration number. For example, for the input IR:
++/// ```
++/// %i = fir.alloca i32 {bindc_name = "i"}
++/// %i_decl:2 = hlfir.declare %i ...
++/// ...
++/// fir.do_loop %i_iv = %lb to %ub step %step unordered {
++///   %1 = fir.convert %i_iv : (index) -> i32
++///   fir.store %1 to %i_decl#1 : !fir.ref<i32>
++///   ...
++/// }
++/// ```
++/// this function would return the first 2 ops in the `fir.do_loop`'s region.
++llvm::SetVector<mlir::Operation *>
++extractIndVarUpdateOps(fir::DoLoopOp doLoop) {
++  mlir::Value indVar = doLoop.getInductionVar();
++  llvm::SetVector<mlir::Operation *> indVarUpdateOps;
++
++  llvm::SmallVector<mlir::Value> toProcess;
++  toProcess.push_back(indVar);
++
++  llvm::DenseSet<mlir::Value> done;
++
++  while (!toProcess.empty()) {
++    mlir::Value val = toProcess.back();
++    toProcess.pop_back();
++
++    if (!done.insert(val).second)
++      continue;
 +
-+  // For a type that is either IntegerType or IndexType, return the
-+  // equivalent IntegerType. In the former case this is a no-op.
-+  auto asIntTy = [&](mlir::Type ty) -> mlir::IntegerType {
-+    if (ty.isIndex()) {
-+      return mlir::IntegerType::get(ty.getContext(), widthOf(ty));
++    for (mlir::Operation *user : val.getUsers()) {
++      indVarUpdateOps.insert(user);
++
++      for (mlir::Value result : user->getResults())
++        toProcess.push_back(result);
 +    }
-+    assert(ty.isIntOrIndex() && "Unexpected type");
-+    return mlir::cast<mlir::IntegerType>(ty);
++  }
++
++  return std::move(indVarUpdateOps);
++}
++
++/// Starting with a value at the end of a definition/conversion chain, walk the
++/// chain backwards and collect all the visited ops along the way. This is the
++/// same as the "backward slice" of the use-def chain of \p link.
++///
++/// If the root of the chain/slice is a constant op  (where convert operations
++/// on constant count as constants as well), then populate \p opChain with the
++/// extracted chain/slice. If not, then \p opChain will contains a single value:
++/// \p link.
++///
++/// The purpose of this function is that we pull in the chain of
++/// constant+conversion ops inside the parallel region if possible; which
++/// prevents creating an unnecessary shared/mapped value that crosses the OpenMP
++/// region.
++///
++/// For example, given this IR:
++/// ```
++/// %c10 = arith.constant 10 : i32
++/// %10 = fir.convert %c10 : (i32) -> index
++/// ```
++/// and giving `%10` as the starting input: `link`, `defChain` would contain
++/// both of the above ops.
++void collectIndirectConstOpChain(mlir::Operation *link,
++                                 llvm::SetVector<mlir::Operation *> &opChain) {
++  mlir::BackwardSliceOptions options;
++  options.inclusive = true;
++  mlir::getBackwardSlice(link, &opChain, options);
++
++  assert(!opChain.empty());
++
++  bool isConstantChain = [&]() {
++    if (!mlir::isa_and_present<mlir::arith::ConstantOp>(opChain.front()))
++      return false;
++
++    return llvm::all_of(llvm::drop_begin(opChain), [](mlir::Operation *op) {
++      return mlir::isa_and_present<fir::ConvertOp>(op);
++    });
++  }();
++
++  if (isConstantChain)
++    return;
++
++  opChain.clear();
++  opChain.insert(link);
++}
++
++/// Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff
++/// there are no operations in \p outerloop's other than:
++///
++/// 1. the operations needed to assing/update \p outerLoop's induction variable.
++/// 2. \p innerLoop itself.
++///
++/// \p return true if \p innerLoop is perfectly nested inside \p outerLoop
++/// according to the above definition.
++bool isPerfectlyNested(fir::DoLoopOp outerLoop, fir::DoLoopOp innerLoop) {
++  mlir::BackwardSliceOptions backwardSliceOptions;
++  backwardSliceOptions.inclusive = true;
++  // We will collect the backward slices for innerLoop's LB, UB, and step.
++  // However, we want to limit the scope of these slices to the scope of
++  // outerLoop's region.
++  backwardSliceOptions.filter = [&](mlir::Operation *op) {
++    return !mlir::areValuesDefinedAbove(op->getResults(),
++                                        outerLoop.getRegion());
 +  };
 +
-+  // For two given values, establish a common signless IntegerType
-+  // that can represent any value of type of x and of type of y,
-+  // and return the pair of x, y converted to the new type.
-+  auto unifyToSignless =
-+      [&](fir::FirOpBuilder &b, mlir::Value x,
-+          mlir::Value y) -> std::pair<mlir::Value, mlir::Value> {
-+    auto tyX = asIntTy(x.getType()), tyY = asIntTy(y.getType());
-+    unsigned width = std::max(widthOf(tyX), widthOf(tyY));
-+    auto wideTy = mlir::IntegerType::get(b.getContext(), width,
-+                                         mlir::IntegerType::Signless);
-+    return std::make_pair(b.createConvert(loc, wideTy, x),
-+                          b.createConvert(loc, wideTy, y));
++  mlir::ForwardSliceOptions forwardSliceOptions;
++  forwardSliceOptions.inclusive = true;
++  // We don't care about the outer-loop's induction variable's uses within the
++  // inner-loop, so we filter out these uses.
++  forwardSliceOptions.filter = [&](mlir::Operation *op) {
++    return mlir::areValuesDefinedAbove(op->getResults(), innerLoop.getRegion());
 +  };
 +
-+  // Start with signless i32 by default.
-+  auto tripCount = builder.createIntegerConstant(loc, builder.getI32Type(), 1);
-+
-+  for (auto [origLb, origUb, origStep] :
-+       llvm::zip(ops.loopLBVar, ops.loopUBVar, ops.loopStepVar)) {
-+    auto tmpS0 = builder.createIntegerConstant(loc, origStep.getType(), 0);
-+    auto [step, step0] = unifyToSignless(builder, origStep, tmpS0);
-+    auto reverseCond =
-+        builder.create<CmpIOp>(loc, CmpIPredicate::slt, step, step0);
-+    auto negStep = builder.create<SubIOp>(loc, step0, step);
-+    mlir::Value absStep =
-+        builder.create<SelectOp>(loc, reverseCond, negStep, step);
-+
-+    auto [lb, ub] = unifyToSignless(builder, origLb, origUb);
-+    auto start = builder.create<SelectOp>(loc, reverseCond, ub, lb);
-+    auto end = builder.create<SelectOp>(loc, reverseCond, lb, ub);
-+
-+    mlir::Value range = builder.create<SubIOp>(loc, end, start);
-+    auto rangeCond =
-+        builder.create<CmpIOp>(loc, CmpIPredicate::slt, end, start);
-+    std::tie(range, absStep) = unifyToSignless(builder, range, absStep);
-+    // numSteps = (range /u absStep) + 1
-+    auto numSteps = builder.create<AddIOp>(
-+        loc, builder.create<DivUIOp>(loc, range, absStep),
-+        builder.createIntegerConstant(loc, range.getType(), 1));
-+
-+    auto trip0 = builder.createIntegerConstant(loc, numSteps.getType(), 0);
-+    auto loopTripCount =
-+        builder.create<SelectOp>(loc, rangeCond, trip0, numSteps);
-+    auto [totalTC, thisTC] = unifyToSignless(builder, tripCount, loopTripCount);
-+    tripCount = builder.create<MulIOp>(loc, totalTC, thisTC);
++  llvm::SetVector<mlir::Operation *> indVarSlice;
++  mlir::getForwardSlice(outerLoop.getInductionVar(), &indVarSlice,
++                        forwardSliceOptions);
++  llvm::DenseSet<mlir::Operation *> innerLoopSetupOpsSet(indVarSlice.begin(),
++                                                         indVarSlice.end());
++
++  llvm::DenseSet<mlir::Operation *> loopBodySet;
++  outerLoop.walk<mlir::WalkOrder::PreOrder>([&](mlir::Operation *op) {
++    if (op == outerLoop)
++      return mlir::WalkResult::advance();
++
++    if (op == innerLoop)
++      return mlir::WalkResult::skip();
++
++    if (mlir::isa<fir::ResultOp>(op))
++      return mlir::WalkResult::advance();
++
++    loopBodySet.insert(op);
++    return mlir::WalkResult::advance();
++  });
++
++  bool result = (loopBodySet == innerLoopSetupOpsSet);
++  mlir::Location loc = outerLoop.getLoc();
++  LLVM_DEBUG(DBGS() << "Loop pair starting at location " << loc << " is"
++                    << (result ? "" : " not") << " perfectly nested\n");
++
++  return result;
++}
++
++/// Starting with `outerLoop` collect a perfectly nested loop nest, if any. This
++/// function collects as much as possible loops in the nest; it case it fails to
++/// recognize a certain nested loop as part of the nest it just returns the
++/// parent loops it discovered before.
++mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
++                                    LoopNestToIndVarMap &loopNest) {
++  assert(currentLoop.getUnordered());
++
++  while (true) {
++    loopNest.try_emplace(
++        currentLoop,
++        InductionVariableInfo{
++            findLoopIndVarMemDecl(currentLoop),
++            std::move(looputils::extractIndVarUpdateOps(currentLoop))});
++
++    auto directlyNestedLoops = currentLoop.getRegion().getOps<fir::DoLoopOp>();
++    llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
++
++    for (auto nestedLoop : directlyNestedLoops)
++      if (nestedLoop.getUnordered())
++        unorderedLoops.push_back(nestedLoop);
++
++    if (unorderedLoops.empty())
++      break;
++
++    if (unorderedLoops.size() > 1)
++      return mlir::failure();
++
++    fir::DoLoopOp nestedUnorderedLoop = unorderedLoops.front();
++
++    if (!isPerfectlyNested(currentLoop, nestedUnorderedLoop))
++      return mlir::failure();
++
++    currentLoop = nestedUnorderedLoop;
 +  }
 +
-+  return tripCount;
++  return mlir::success();
 +}
-+} // namespace internal
-+} // namespace omp
-+} // namespace lower
-+} // namespace Fortran
 +
-+namespace {
++/// Prepares the `fir.do_loop` nest to be easily mapped to OpenMP. In
++/// particular, this function would take this input IR:
++/// ```
++/// fir.do_loop %i_iv = %i_lb to %i_ub step %i_step unordered {
++///   fir.store %i_iv to %i#1 : !fir.ref<i32>
++///   %j_lb = arith.constant 1 : i32
++///   %j_ub = arith.constant 10 : i32
++///   %j_step = arith.constant 1 : index
++///
++///   fir.do_loop %j_iv = %j_lb to %j_ub step %j_step unordered {
++///     fir.store %j_iv to %j#1 : !fir.ref<i32>
++///     ...
++///   }
++/// }
++/// ```
++///
++/// into the following form (using generic op form since the result is
++/// technically an invalid `fir.do_loop` op:
++///
++/// ```
++/// "fir.do_loop"(%i_lb, %i_ub, %i_step) <{unordered}> ({
++/// ^bb0(%i_iv: index):
++///   %j_lb = "arith.constant"() <{value = 1 : i32}> : () -> i32
++///   %j_ub = "arith.constant"() <{value = 10 : i32}> : () -> i32
++///   %j_step = "arith.constant"() <{value = 1 : index}> : () -> index
++///
++///   "fir.do_loop"(%j_lb, %j_ub, %j_step) <{unordered}> ({
++///   ^bb0(%new_i_iv: index, %new_j_iv: index):
++///     "fir.store"(%new_i_iv, %i#1) : (i32, !fir.ref<i32>) -> ()
++///     "fir.store"(%new_j_iv, %j#1) : (i32, !fir.ref<i32>) -> ()
++///     ...
++///   })
++/// ```
++///
++/// What happened to the loop nest is the following:
++///
++/// * the innermost loop's entry block was updated from having one operand to
++///   having `n` operands where `n` is the number of loops in the nest,
++///
++/// * the outer loop(s)' ops that update the IVs were sank inside the innermost
++///   loop (see the `"fir.store"(%new_i_iv, %i#1)` op above),
++///
++/// * the innermost loop's entry block's arguments were mapped in order from the
++///   outermost to the innermost IV.
++///
++/// With this IR change, we can directly inline the innermost loop's region into
++/// the newly generated `omp.loop_nest` op.
++///
++/// Note that this function has a pre-condition that \p loopNest consists of
++/// perfectly nested loops; i.e. there are no in-between ops between 2 nested
++/// loops except for the ops to setup the inner loop's LB, UB, and step. These
++/// ops are handled/cloned by `genLoopNestClauseOps(..)`.
++void sinkLoopIVArgs(mlir::ConversionPatternRewriter &rewriter,
++                    looputils::LoopNestToIndVarMap &loopNest) {
++  if (loopNest.size() <= 1)
++    return;
++
++  fir::DoLoopOp innermostLoop = loopNest.back().first;
++  mlir::Operation &innermostFirstOp = innermostLoop.getRegion().front().front();
++
++  llvm::SmallVector<mlir::Type> argTypes;
++  llvm::SmallVector<mlir::Location> argLocs;
++
++  for (auto &[doLoop, indVarInfo] : llvm::drop_end(loopNest)) {
++    // Sink the IV update ops to the innermost loop. We need to do for all loops
++    // except for the innermost one, hence the `drop_end` usage above.
++    for (mlir::Operation *op : indVarInfo.indVarUpdateOps)
++      op->moveBefore(&innermostFirstOp);
++
++    argTypes.push_back(doLoop.getInductionVar().getType());
++    argLocs.push_back(doLoop.getInductionVar().getLoc());
++  }
++
++  mlir::Region &innermmostRegion = innermostLoop.getRegion();
++  // Extend the innermost entry block with arguments to represent the outer IVs.
++  innermmostRegion.addArguments(argTypes, argLocs);
++
++  unsigned idx = 1;
++  // In reverse, remap the IVs of the loop nest from the old values to the new
++  // ones. We do that in reverse since the first argument before this loop is
++  // the old IV for the innermost loop. Therefore, we want to replace it first
++  // before the old value (1st argument in the block) is remapped to be the IV
++  // of the outermost loop in the nest.
++  for (auto &[doLoop, _] : llvm::reverse(loopNest)) {
++    doLoop.getInductionVar().replaceAllUsesWith(
++        innermmostRegion.getArgument(innermmostRegion.getNumArguments() - idx));
++    ++idx;
++  }
++}
++
++/// Collects values that are local to a loop: "loop-local values". A loop-local
++/// value is one that is used exclusively inside the loop but allocated outside
++/// of it. This usually corresponds to temporary values that are used inside the
++/// loop body for initialzing other variables for example.
++///
++/// \param [in] doLoop - the loop within which the function searches for values
++/// used exclusively inside.
++///
++/// \param [out] locals - the list of loop-local values detected for \p doLoop.
++static void collectLoopLocalValues(fir::DoLoopOp doLoop,
++                                   llvm::SetVector<mlir::Value> &locals) {
++  doLoop.walk([&](mlir::Operation *op) {
++    for (mlir::Value operand : op->getOperands()) {
++      if (locals.contains(operand))
++        continue;
++
++      bool isLocal = true;
++
++      if (!mlir::isa_and_present<fir::AllocaOp>(operand.getDefiningOp()))
++        continue;
++
++      // Values defined inside the loop are not interesting since they do not
++      // need to be localized.
++      if (doLoop->isAncestor(operand.getDefiningOp()))
++        continue;
++
++      for (auto *user : operand.getUsers()) {
++        if (!doLoop->isAncestor(user)) {
++          isLocal = false;
++          break;
++        }
++      }
++
++      if (isLocal)
++        locals.insert(operand);
++    }
++  });
++}
++
++/// For a "loop-local" value \p local within a loop's scope, localizes that
++/// value within the scope of the parallel region the loop maps to. Towards that
++/// end, this function moves the allocation of \p local within \p allocRegion.
++///
++/// \param local - the value used exclusively within a loop's scope (see
++/// collectLoopLocalValues).
++///
++/// \param allocRegion - the parallel region where \p local's allocation will be
++/// privatized.
++///
++/// \param rewriter - builder used for updating \p allocRegion.
++static void localizeLoopLocalValue(mlir::Value local, mlir::Region &allocRegion,
++                                   mlir::ConversionPatternRewriter &rewriter) {
++  rewriter.moveOpBefore(local.getDefiningOp(), &allocRegion.front().front());
++}
++} // namespace looputils
++
 +class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
 +public:
 +  using mlir::OpConversionPattern<fir::DoLoopOp>::OpConversionPattern;
 +
-+  DoConcurrentConversion(mlir::MLIRContext *context, bool mapToDevice)
-+      : OpConversionPattern(context), mapToDevice(mapToDevice) {}
++  DoConcurrentConversion(mlir::MLIRContext *context, bool mapToDevice,
++                         llvm::DenseSet<fir::DoLoopOp> &concurrentLoopsToSkip)
++      : OpConversionPattern(context), mapToDevice(mapToDevice),
++        concurrentLoopsToSkip(concurrentLoopsToSkip) {}
 +
 +  mlir::LogicalResult
 +  matchAndRewrite(fir::DoLoopOp doLoop, OpAdaptor adaptor,
@@ -6900,116 +7038,99 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D
 +                  "defining operation.");
 +    }
 +
-+    std::function<bool(mlir::Operation *)> isOpUltimatelyConstant =
-+        [&](mlir::Operation *operation) {
-+          if (mlir::isa_and_present<mlir::arith::ConstantOp>(operation))
-+            return true;
++    looputils::LoopNestToIndVarMap loopNest;
++    bool hasRemainingNestedLoops =
++        failed(looputils::collectLoopNest(doLoop, loopNest));
++    if (hasRemainingNestedLoops)
++      mlir::emitWarning(doLoop.getLoc(),
++                        "Some `do concurent` loops are not perfectly-nested. "
++                        "These will be serialzied.");
++
++    llvm::SmallVector<mlir::Value> loopNestLiveIns;
++    looputils::collectLoopLiveIns(loopNest.back().first, loopNestLiveIns);
++    assert(!loopNestLiveIns.empty());
++
++    llvm::SetVector<mlir::Value> locals;
++    looputils::collectLoopLocalValues(loopNest.back().first, locals);
++    // We do not want to map "loop-local" values to the device through
++    // `omp.map.info` ops. Therefore, we remove them from the list of live-ins.
++    loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns,
++                                          [&](mlir::Value liveIn) {
++                                            return locals.contains(liveIn);
++                                          }),
++                          loopNestLiveIns.end());
++
++    looputils::sinkLoopIVArgs(rewriter, loopNest);
 +
-+          if (auto convertOp =
-+                  mlir::dyn_cast_if_present<fir::ConvertOp>(operation))
-+            return isOpUltimatelyConstant(convertOp.getValue().getDefiningOp());
-+
-+          return false;
-+        };
-+
-+    if (!isOpUltimatelyConstant(lbOp) || !isOpUltimatelyConstant(ubOp) ||
-+        !isOpUltimatelyConstant(stepOp)) {
-+      return rewriter.notifyMatchFailure(
-+          doLoop, "`do concurrent` conversion is currently only supported for "
-+                  "constant LB, UB, and step values.");
-+    }
-+
-+    llvm::SmallVector<mlir::Value> liveIns;
-+    collectLoopLiveIns(doLoop, liveIns);
-+    assert(!liveIns.empty());
++    mlir::omp::TargetOp targetOp;
++    mlir::omp::LoopNestOperands loopNestClauseOps;
 +
 +    mlir::IRMapping mapper;
-+    mlir::omp::TargetOp targetOp;
-+    mlir::omp::LoopNestClauseOps loopNestClauseOps;
 +
 +    if (mapToDevice) {
-+      mlir::omp::TargetClauseOps clauseOps;
-+      for (mlir::Value liveIn : liveIns)
-+        clauseOps.mapVars.push_back(genMapInfoOpForLiveIn(rewriter, liveIn));
-+      targetOp =
-+          genTargetOp(doLoop.getLoc(), rewriter, mapper, liveIns, clauseOps);
-+      genTeamsOp(doLoop.getLoc(), rewriter, doLoop, liveIns, mapper,
-+                 loopNestClauseOps);
-+      genDistributeOp(doLoop.getLoc(), rewriter);
-+    }
++      // TODO: Currently the loop bounds for the outer loop are duplicated.
++      mlir::omp::TargetOperands targetClauseOps;
++      genLoopNestClauseOps(doLoop.getLoc(), rewriter, loopNest, mapper,
++                           loopNestClauseOps, &targetClauseOps);
++
++      // Prevent mapping host-evaluated variables.
++      loopNestLiveIns.erase(
++          llvm::remove_if(loopNestLiveIns,
++                          [&](mlir::Value liveIn) {
++                            return llvm::is_contained(
++                                targetClauseOps.hostEvalVars, liveIn);
++                          }),
++          loopNestLiveIns.end());
++
++      // The outermost loop will contain all the live-in values in all nested
++      // loops since live-in values are collected recursively for all nested
++      // ops.
++      for (mlir::Value liveIn : loopNestLiveIns)
++        targetClauseOps.mapVars.push_back(
++            genMapInfoOpForLiveIn(rewriter, liveIn));
 +
-+    genParallelOp(doLoop.getLoc(), rewriter, doLoop, liveIns, mapper,
-+                  loopNestClauseOps);
-+    genWsLoopOp(rewriter, doLoop, mapper, loopNestClauseOps);
++      targetOp =
++          genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns,
++                      targetClauseOps, loopNestClauseOps);
 +
-+    // Now that we created the nested `ws.loop` op, we set can the `target` op's
-+    // trip count.
-+    if (mapToDevice) {
-+      rewriter.setInsertionPoint(targetOp);
-+      auto parentModule = doLoop->getParentOfType<mlir::ModuleOp>();
-+      fir::FirOpBuilder firBuilder(rewriter, fir::getKindMapping(parentModule));
-+
-+      mlir::omp::CollapseClauseOps collapseClauseOps;
-+      collapseClauseOps.loopLBVar.push_back(lbOp->getResult(0));
-+      collapseClauseOps.loopUBVar.push_back(ubOp->getResult(0));
-+      collapseClauseOps.loopStepVar.push_back(stepOp->getResult(0));
-+
-+      mlir::cast<mlir::omp::TargetOp>(targetOp).getTripCountMutable().assign(
-+          Fortran::lower::omp::internal::calculateTripCount(
-+              firBuilder, doLoop.getLoc(), collapseClauseOps));
++      genTeamsOp(doLoop.getLoc(), rewriter);
 +    }
 +
-+    rewriter.eraseOp(doLoop);
-+    return mlir::success();
-+  }
-+
-+private:
-+  /// Collect the list of values used inside the loop but defined outside of it.
-+  /// The first item in the returned list is always the loop's induction
-+  /// variable.
-+  void collectLoopLiveIns(fir::DoLoopOp doLoop,
-+                          llvm::SmallVectorImpl<mlir::Value> &liveIns) const {
-+    // Given an operation `op`, this lambda returns true if `op`'s operand is
-+    // ultimately the loop's induction variable. Detecting this helps finding
-+    // the live-in value corresponding to the induction variable in case the
-+    // induction variable is indirectly used in the loop (e.g. throught a cast
-+    // op).
-+    std::function<bool(mlir::Operation * op)> isIndVarUltimateOperand =
-+        [&](mlir::Operation *op) {
-+          if (auto storeOp = mlir::dyn_cast_if_present<fir::StoreOp>(op)) {
-+            return (storeOp.getValue() == doLoop.getInductionVar()) ||
-+                   isIndVarUltimateOperand(storeOp.getValue().getDefiningOp());
-+          }
++    mlir::omp::ParallelOp parallelOp =
++        genParallelOp(doLoop.getLoc(), rewriter, loopNest, mapper);
++    // Only set as composite when part of `distribute parallel do`.
++    parallelOp.setComposite(mapToDevice);
 +
-+          if (auto convertOp = mlir::dyn_cast_if_present<fir::ConvertOp>(op)) {
-+            return convertOp.getOperand() == doLoop.getInductionVar() ||
-+                   isIndVarUltimateOperand(
-+                       convertOp.getValue().getDefiningOp());
-+          }
++    if (!mapToDevice)
++      genLoopNestClauseOps(doLoop.getLoc(), rewriter, loopNest, mapper,
++                           loopNestClauseOps);
 +
-+          return false;
-+        };
++    for (mlir::Value local : locals)
++      looputils::localizeLoopLocalValue(local, parallelOp.getRegion(),
++                                        rewriter);
 +
-+    llvm::SmallDenseSet<mlir::Value> seenValues;
-+    llvm::SmallDenseSet<mlir::Operation *> seenOps;
++    if (mapToDevice)
++      genDistributeOp(doLoop.getLoc(), rewriter).setComposite(/*val=*/true);
 +
-+    mlir::visitUsedValuesDefinedAbove(
-+        doLoop.getRegion(), [&](mlir::OpOperand *operand) {
-+          if (!seenValues.insert(operand->get()).second)
-+            return;
++    mlir::omp::LoopNestOp ompLoopNest =
++        genWsLoopOp(rewriter, loopNest.back().first, mapper, loopNestClauseOps,
++                    /*isComposite=*/mapToDevice);
 +
-+          mlir::Operation *definingOp = operand->get().getDefiningOp();
-+          // We want to collect ops corresponding to live-ins only once.
-+          if (definingOp && !seenOps.insert(definingOp).second)
-+            return;
++    rewriter.eraseOp(doLoop);
 +
-+          liveIns.push_back(operand->get());
++    // Mark `unordered` loops that are not perfectly nested to be skipped from
++    // the legality check of the `ConversionTarget` since we are not interested
++    // in mapping them to OpenMP.
++    ompLoopNest->walk([&](fir::DoLoopOp doLoop) {
++      if (doLoop.getUnordered()) {
++        concurrentLoopsToSkip.insert(doLoop);
++      }
++    });
 +
-+          if (isIndVarUltimateOperand(operand->getOwner()))
-+            std::swap(*liveIns.begin(), *liveIns.rbegin());
-+        });
++    return mlir::success();
 +  }
 +
++private:
 +  void genBoundsOps(mlir::ConversionPatternRewriter &rewriter,
 +                    mlir::Location loc, hlfir::DeclareOp declareOp,
 +                    llvm::SmallVectorImpl<mlir::Value> &boundsOps) const {
@@ -7073,42 +7194,78 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D
 +    llvm::SmallVector<mlir::Value> boundsOps;
 +    genBoundsOps(rewriter, liveIn.getLoc(), declareOp, boundsOps);
 +
-+    return Fortran::lower::omp ::internal::createMapInfoOp(
-+        rewriter, liveIn.getLoc(), declareOp.getBase(), /*varPtrPtr=*/{},
-+        declareOp.getUniqName().str(), boundsOps, /*members=*/{},
-+        /*membersIndex=*/mlir::DenseIntElementsAttr{},
++    // Use the raw address to avoid unboxing `fir.box` values whenever possible.
++    // Put differently, if we have access to the direct value memory
++    // reference/address, we use it.
++    mlir::Value rawAddr = declareOp.getOriginalBase();
++    return Fortran::lower::omp::internal::createMapInfoOp(
++        rewriter, liveIn.getLoc(), rawAddr,
++        /*varPtrPtr=*/{}, declareOp.getUniqName().str(), boundsOps,
++        /*members=*/{},
++        /*membersIndex=*/mlir::ArrayAttr{},
 +        static_cast<
 +            std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
 +            mapFlag),
-+        captureKind, liveInType);
++        captureKind, rawAddr.getType());
 +  }
 +
-+  mlir::omp::TargetOp genTargetOp(mlir::Location loc,
-+                                  mlir::ConversionPatternRewriter &rewriter,
-+                                  mlir::IRMapping &mapper,
-+                                  llvm::ArrayRef<mlir::Value> liveIns,
-+                                  mlir::omp::TargetClauseOps &clauseOps) const {
++  mlir::omp::TargetOp
++  genTargetOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
++              mlir::IRMapping &mapper, llvm::ArrayRef<mlir::Value> mappedVars,
++              mlir::omp::TargetOperands &clauseOps,
++              mlir::omp::LoopNestOperands &loopNestClauseOps) const {
 +    auto targetOp = rewriter.create<mlir::omp::TargetOp>(loc, clauseOps);
++    auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp);
 +
 +    mlir::Region &region = targetOp.getRegion();
 +
-+    llvm::SmallVector<mlir::Type> liveInTypes;
-+    llvm::SmallVector<mlir::Location> liveInLocs;
++    llvm::SmallVector<mlir::Type> regionArgTypes;
++    llvm::SmallVector<mlir::Location> regionArgLocs;
 +
-+    for (mlir::Value liveIn : liveIns) {
-+      liveInTypes.push_back(liveIn.getType());
-+      liveInLocs.push_back(liveIn.getLoc());
++    for (auto var :
++         llvm::concat<const mlir::Value>(clauseOps.hostEvalVars, mappedVars)) {
++      regionArgTypes.push_back(var.getType());
++      regionArgLocs.push_back(var.getLoc());
 +    }
 +
-+    rewriter.createBlock(&region, {}, liveInTypes, liveInLocs);
++    rewriter.createBlock(&region, {}, regionArgTypes, regionArgLocs);
 +
 +    for (auto [arg, mapInfoOp] :
-+         llvm::zip_equal(region.getArguments(), clauseOps.mapVars)) {
++         llvm::zip_equal(argIface.getMapBlockArgs(), clauseOps.mapVars)) {
 +      auto miOp = mlir::cast<mlir::omp::MapInfoOp>(mapInfoOp.getDefiningOp());
 +      hlfir::DeclareOp liveInDeclare = genLiveInDeclare(rewriter, arg, miOp);
-+      mapper.map(miOp.getVariableOperand(0), liveInDeclare.getBase());
++      mlir::Value miOperand = miOp.getVariableOperand(0);
++
++      // TODO If `miOperand.getDefiningOp()` is a `fir::BoxAddrOp`, we probably
++      // need to "unpack" the box by getting the defining op of it's value.
++      // However, we did not hit this case in reality yet so leaving it as a
++      // todo for now.
++
++      mapper.map(miOperand, liveInDeclare.getOriginalBase());
++
++      if (auto origDeclareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>(
++              miOperand.getDefiningOp()))
++        mapper.map(origDeclareOp.getBase(), liveInDeclare.getBase());
++    }
++
++    for (auto [arg, hostEval] : llvm::zip_equal(argIface.getHostEvalBlockArgs(),
++                                                clauseOps.hostEvalVars))
++      mapper.map(hostEval, arg);
++
++    for (unsigned i = 0; i < loopNestClauseOps.loopLowerBounds.size(); ++i) {
++      loopNestClauseOps.loopLowerBounds[i] =
++          mapper.lookup(loopNestClauseOps.loopLowerBounds[i]);
++      loopNestClauseOps.loopUpperBounds[i] =
++          mapper.lookup(loopNestClauseOps.loopUpperBounds[i]);
++      loopNestClauseOps.loopSteps[i] =
++          mapper.lookup(loopNestClauseOps.loopSteps[i]);
 +    }
 +
++    fir::FirOpBuilder firBuilder(
++        rewriter,
++        fir::getKindMapping(targetOp->getParentOfType<mlir::ModuleOp>()));
++    Fortran::lower::omp::internal::cloneOrMapRegionOutsiders(firBuilder,
++                                                             targetOp);
 +    rewriter.setInsertionPoint(
 +        rewriter.create<mlir::omp::TerminatorOp>(targetOp.getLoc()));
 +
@@ -7158,28 +7315,23 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D
 +  }
 +
 +  mlir::omp::TeamsOp
-+  genTeamsOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
-+             fir::DoLoopOp doLoop, llvm::ArrayRef<mlir::Value> liveIns,
-+             mlir::IRMapping &mapper,
-+             mlir::omp::LoopNestClauseOps &loopNestClauseOps) const {
++  genTeamsOp(mlir::Location loc,
++             mlir::ConversionPatternRewriter &rewriter) const {
 +    auto teamsOp = rewriter.create<mlir::omp::TeamsOp>(
-+        loc, /*clauses=*/mlir::omp::TeamsClauseOps{});
++        loc, /*clauses=*/mlir::omp::TeamsOperands{});
 +
 +    rewriter.createBlock(&teamsOp.getRegion());
 +    rewriter.setInsertionPoint(rewriter.create<mlir::omp::TerminatorOp>(loc));
 +
-+    genInductionVariableAlloc(rewriter, liveIns, mapper);
-+    genLoopNestClauseOps(loc, rewriter, doLoop, mapper, loopNestClauseOps);
-+
 +    return teamsOp;
 +  }
 +
-+  void
-+  genLoopNestClauseOps(mlir::Location loc,
-+                       mlir::ConversionPatternRewriter &rewriter,
-+                       fir::DoLoopOp doLoop, mlir::IRMapping &mapper,
-+                       mlir::omp::LoopNestClauseOps &loopNestClauseOps) const {
-+    assert(loopNestClauseOps.loopLBVar.empty() &&
++  void genLoopNestClauseOps(
++      mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
++      looputils::LoopNestToIndVarMap &loopNest, mlir::IRMapping &mapper,
++      mlir::omp::LoopNestOperands &loopNestClauseOps,
++      mlir::omp::TargetOperands *targetClauseOps = nullptr) const {
++    assert(loopNestClauseOps.loopLowerBounds.empty() &&
 +           "Loop nest bounds were already emitted!");
 +
 +    // Clones the chain of ops defining a certain loop bound or its step into
@@ -7187,53 +7339,60 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D
 +    // `fir.convert`op, this lambda clones the `fir.convert` as well as the
 +    // value it converts from. We do this since `omp.target` regions are
 +    // isolated from above.
-+    std::function<mlir::Operation *(mlir::Operation *)>
-+        cloneBoundOrStepDefChain = [&](mlir::Operation *operation) {
-+          if (mlir::isa_and_present<mlir::arith::ConstantOp>(operation))
-+            return rewriter.clone(*operation, mapper);
-+
-+          if (auto convertOp =
-+                  mlir::dyn_cast_if_present<fir::ConvertOp>(operation)) {
-+            cloneBoundOrStepDefChain(convertOp.getValue().getDefiningOp());
-+            return rewriter.clone(*operation, mapper);
-+          }
++    auto cloneBoundOrStepOpChain =
++        [&](mlir::Operation *operation) -> mlir::Operation * {
++      llvm::SetVector<mlir::Operation *> opChain;
++      looputils::collectIndirectConstOpChain(operation, opChain);
 +
-+          std::string opStr;
-+          llvm::raw_string_ostream opOs(opStr);
-+          opOs << "Unexpected operation: " << *operation;
-+          llvm_unreachable(opOs.str().c_str());
-+        };
++      mlir::Operation *result;
++      for (mlir::Operation *link : opChain)
++        result = rewriter.clone(*link, mapper);
 +
-+    mlir::Operation *lbOp = doLoop.getLowerBound().getDefiningOp();
-+    mlir::Operation *ubOp = doLoop.getUpperBound().getDefiningOp();
-+    mlir::Operation *stepOp = doLoop.getStep().getDefiningOp();
++      return result;
++    };
++
++    auto hostEvalCapture = [&](mlir::Value var,
++                               llvm::SmallVectorImpl<mlir::Value> &bounds) {
++      var = cloneBoundOrStepOpChain(var.getDefiningOp())->getResult(0);
++      bounds.push_back(var);
++
++      if (targetClauseOps)
++        targetClauseOps->hostEvalVars.push_back(var);
++    };
++
++    for (auto &[doLoop, _] : loopNest) {
++      hostEvalCapture(doLoop.getLowerBound(),
++                      loopNestClauseOps.loopLowerBounds);
++      hostEvalCapture(doLoop.getUpperBound(),
++                      loopNestClauseOps.loopUpperBounds);
++      hostEvalCapture(doLoop.getStep(), loopNestClauseOps.loopSteps);
++    }
 +
-+    loopNestClauseOps.loopLBVar.push_back(
-+        cloneBoundOrStepDefChain(lbOp)->getResult(0));
-+    loopNestClauseOps.loopLBVar.push_back(
-+        cloneBoundOrStepDefChain(ubOp)->getResult(0));
-+    loopNestClauseOps.loopLBVar.push_back(
-+        cloneBoundOrStepDefChain(stepOp)->getResult(0));
-+    loopNestClauseOps.loopInclusiveAttr = rewriter.getUnitAttr();
++    loopNestClauseOps.loopInclusive = rewriter.getUnitAttr();
 +  }
 +
 +  mlir::omp::DistributeOp
 +  genDistributeOp(mlir::Location loc,
 +                  mlir::ConversionPatternRewriter &rewriter) const {
 +    auto distOp = rewriter.create<mlir::omp::DistributeOp>(
-+        loc, /*clauses=*/mlir::omp::DistributeClauseOps{});
++        loc, /*clauses=*/mlir::omp::DistributeOperands{});
 +
 +    rewriter.createBlock(&distOp.getRegion());
-+    rewriter.setInsertionPoint(rewriter.create<mlir::omp::TerminatorOp>(loc));
-+
 +    return distOp;
 +  }
 +
-+  void genInductionVariableAlloc(mlir::ConversionPatternRewriter &rewriter,
-+                                 llvm::ArrayRef<mlir::Value> liveIns,
-+                                 mlir::IRMapping &mapper) const {
-+    mlir::Operation *indVarMemDef = liveIns.front().getDefiningOp();
++  void genLoopNestIndVarAllocs(mlir::ConversionPatternRewriter &rewriter,
++                               looputils::LoopNestToIndVarMap &loopNest,
++                               mlir::IRMapping &mapper) const {
++
++    for (auto &[_, indVarInfo] : loopNest)
++      genInductionVariableAlloc(rewriter, indVarInfo.iterVarMemDef, mapper);
++  }
 +
++  mlir::Operation *
++  genInductionVariableAlloc(mlir::ConversionPatternRewriter &rewriter,
++                            mlir::Operation *indVarMemDef,
++                            mlir::IRMapping &mapper) const {
 +    assert(
 +        indVarMemDef != nullptr &&
 +        "Induction variable memdef is expected to have a defining operation.");
@@ -7243,38 +7402,34 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D
 +      indVarDeclareAndAlloc.insert(operand.getDefiningOp());
 +    indVarDeclareAndAlloc.insert(indVarMemDef);
 +
++    mlir::Operation *result;
 +    for (mlir::Operation *opToClone : indVarDeclareAndAlloc)
-+      rewriter.clone(*opToClone, mapper);
++      result = rewriter.clone(*opToClone, mapper);
++
++    return result;
 +  }
 +
-+  mlir::omp::ParallelOp
-+  genParallelOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
-+                fir::DoLoopOp doLoop, llvm::ArrayRef<mlir::Value> liveIns,
-+                mlir::IRMapping &mapper,
-+                mlir::omp::LoopNestClauseOps &loopNestClauseOps) const {
++  mlir::omp::ParallelOp genParallelOp(mlir::Location loc,
++                                      mlir::ConversionPatternRewriter &rewriter,
++                                      looputils::LoopNestToIndVarMap &loopNest,
++                                      mlir::IRMapping &mapper) const {
 +    auto parallelOp = rewriter.create<mlir::omp::ParallelOp>(loc);
 +    rewriter.createBlock(&parallelOp.getRegion());
 +    rewriter.setInsertionPoint(rewriter.create<mlir::omp::TerminatorOp>(loc));
 +
-+    // If mapping to host, the local induction variable and loop bounds need to
-+    // be emitted as part of the `omp.parallel` op.
-+    if (!mapToDevice) {
-+      genInductionVariableAlloc(rewriter, liveIns, mapper);
-+      genLoopNestClauseOps(loc, rewriter, doLoop, mapper, loopNestClauseOps);
-+    }
-+
++    genLoopNestIndVarAllocs(rewriter, loopNest, mapper);
 +    return parallelOp;
 +  }
 +
 +  mlir::omp::LoopNestOp
 +  genWsLoopOp(mlir::ConversionPatternRewriter &rewriter, fir::DoLoopOp doLoop,
 +              mlir::IRMapping &mapper,
-+              const mlir::omp::LoopNestClauseOps &clauseOps) const {
++              const mlir::omp::LoopNestOperands &clauseOps,
++              bool isComposite) const {
 +
 +    auto wsloopOp = rewriter.create<mlir::omp::WsloopOp>(doLoop.getLoc());
++    wsloopOp.setComposite(isComposite);
 +    rewriter.createBlock(&wsloopOp.getRegion());
-+    rewriter.setInsertionPoint(
-+        rewriter.create<mlir::omp::TerminatorOp>(wsloopOp.getLoc()));
 +
 +    auto loopNestOp =
 +        rewriter.create<mlir::omp::LoopNestOp>(doLoop.getLoc(), clauseOps);
@@ -7293,19 +7448,17 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D
 +  }
 +
 +  bool mapToDevice;
++  llvm::DenseSet<fir::DoLoopOp> &concurrentLoopsToSkip;
 +};
 +
 +class DoConcurrentConversionPass
-+    : public fir::impl::DoConcurrentConversionPassBase<
++    : public flangomp::impl::DoConcurrentConversionPassBase<
 +          DoConcurrentConversionPass> {
 +public:
-+  using fir::impl::DoConcurrentConversionPassBase<
-+      DoConcurrentConversionPass>::DoConcurrentConversionPassBase;
-+
 +  DoConcurrentConversionPass() = default;
 +
 +  DoConcurrentConversionPass(
-+      const fir::DoConcurrentConversionPassOptions &options)
++      const flangomp::DoConcurrentConversionPassOptions &options)
 +      : DoConcurrentConversionPassBase(options) {}
 +
 +  void runOnOperation() override {
@@ -7317,24 +7470,28 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D
 +
 +    auto *context = &getContext();
 +
-+    if (mapTo != fir::omp::DoConcurrentMappingKind::DCMK_Host &&
-+        mapTo != fir::omp::DoConcurrentMappingKind::DCMK_Device) {
++    if (mapTo != flangomp::DoConcurrentMappingKind::DCMK_Host &&
++        mapTo != flangomp::DoConcurrentMappingKind::DCMK_Device) {
 +      mlir::emitWarning(mlir::UnknownLoc::get(context),
 +                        "DoConcurrentConversionPass: invalid `map-to` value. "
 +                        "Valid values are: `host` or `device`");
 +      return;
 +    }
-+
++    llvm::DenseSet<fir::DoLoopOp> concurrentLoopsToSkip;
 +    mlir::RewritePatternSet patterns(context);
 +    patterns.insert<DoConcurrentConversion>(
-+        context, mapTo == fir::omp::DoConcurrentMappingKind::DCMK_Device);
++        context, mapTo == flangomp::DoConcurrentMappingKind::DCMK_Device,
++        concurrentLoopsToSkip);
 +    mlir::ConversionTarget target(*context);
-+    target.addLegalDialect<fir::FIROpsDialect, hlfir::hlfirDialect,
-+                           mlir::arith::ArithDialect, mlir::func::FuncDialect,
-+                           mlir::omp::OpenMPDialect>();
-+
-+    target.addDynamicallyLegalOp<fir::DoLoopOp>(
-+        [](fir::DoLoopOp op) { return !op.getUnordered(); });
++    target
++        .addLegalDialect<fir::FIROpsDialect, hlfir::hlfirDialect,
++                         mlir::arith::ArithDialect, mlir::func::FuncDialect,
++                         mlir::omp::OpenMPDialect, mlir::cf::ControlFlowDialect,
++                         mlir::math::MathDialect, mlir::LLVM::LLVMDialect>();
++
++    target.addDynamicallyLegalOp<fir::DoLoopOp>([&](fir::DoLoopOp op) {
++      return !op.getUnordered() || concurrentLoopsToSkip.contains(op);
++    });
 +
 +    if (mlir::failed(mlir::applyFullConversion(getOperation(), target,
 +                                               std::move(patterns)))) {
@@ -7347,98 +7504,1225 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/D
 +} // namespace
 +
 +std::unique_ptr<mlir::Pass>
-+fir::createDoConcurrentConversionPass(bool mapToDevice) {
++flangomp::createDoConcurrentConversionPass(bool mapToDevice) {
 +  DoConcurrentConversionPassOptions options;
-+  options.mapTo = mapToDevice ? fir::omp::DoConcurrentMappingKind::DCMK_Device
-+                              : fir::omp::DoConcurrentMappingKind::DCMK_Host;
++  options.mapTo = mapToDevice ? flangomp::DoConcurrentMappingKind::DCMK_Device
++                              : flangomp::DoConcurrentMappingKind::DCMK_Host;
 +
 +  return std::make_unique<DoConcurrentConversionPass>(options);
 +}
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/lib/Optimizer/Transforms/StackArrays.cpp llvm-project/flang/lib/Optimizer/Transforms/StackArrays.cpp
---- llvm-project.orig/flang/lib/Optimizer/Transforms/StackArrays.cpp	2024-06-12 10:43:12.632210369 -0500
-+++ llvm-project/flang/lib/Optimizer/Transforms/StackArrays.cpp	2024-06-12 10:44:09.351614239 -0500
-@@ -571,8 +571,31 @@
-     return {point};
-   };
- 
--  auto oldOmpRegion =
--      oldAlloc->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
-+  // Find the first OpenMP outlineable parent region while taking into account
-+  // the possibility of finding an omp.parallel region that is taking a loop
-+  // wrapper role. These operations must be skipped, as they cannot hold
-+  // allocations.
-+  const auto findOmpRegion = [](mlir::Operation *op) {
-+    auto findOmpRegionImpl =
-+        [](mlir::Operation *op,
-+           auto &findOmpRegion) -> mlir::omp::OutlineableOpenMPOpInterface {
-+      auto ompRegion =
-+          op->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
-+      if (!ompRegion)
-+        return nullptr;
-+
-+      if (auto parallelOp =
-+              mlir::dyn_cast_if_present<mlir::omp::ParallelOp>(*ompRegion)) {
-+        mlir::Operation *parentOp = parallelOp->getParentOp();
-+        if (mlir::isa_and_present<mlir::omp::DistributeOp>(parentOp))
-+          return findOmpRegion(parentOp, findOmpRegion);
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp llvm-project-aso/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp
+--- llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp	2024-11-23 20:39:47.180175366 -0600
+@@ -0,0 +1,70 @@
++//===- GlobalFiltering.cpp ------------------------------------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements transforms to filter out functions intended for the host
++// when compiling for the device and vice versa.
++//
++//===----------------------------------------------------------------------===//
++
++#include "flang/Optimizer/Dialect/FIRDialect.h"
++#include "flang/Optimizer/Dialect/FIROpsSupport.h"
++#include "flang/Optimizer/OpenMP/Passes.h"
++
++#include "mlir/Dialect/Func/IR/FuncOps.h"
++#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
++#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
++#include "mlir/IR/BuiltinOps.h"
++#include "llvm/ADT/SmallVector.h"
++
++namespace flangomp {
++#define GEN_PASS_DEF_GLOBALFILTERINGPASS
++#include "flang/Optimizer/OpenMP/Passes.h.inc"
++} // namespace flangomp
++
++using namespace mlir;
++
++namespace {
++// TODO Remove this pass when AOMP moves to `clang-linker-wrapper` (instead of
++// `clang-offload-packager`).
++class GlobalFilteringPass
++    : public flangomp::impl::GlobalFilteringPassBase<GlobalFilteringPass> {
++public:
++  GlobalFilteringPass() = default;
++
++  void runOnOperation() override {
++    auto op = dyn_cast<omp::OffloadModuleInterface>(getOperation());
++    if (!op || !op.getIsTargetDevice())
++      return;
++
++    op->walk<WalkOrder::PreOrder>([&](fir::GlobalOp globalOp) {
++      bool symbolUnused = true;
++      SymbolTable::UseRange globalUses = *globalOp.getSymbolUses(op);
++      for (SymbolTable::SymbolUse use : globalUses) {
++        if (use.getUser() == globalOp)
++          continue;
++        symbolUnused = false;
++        break;
 +      }
-+      return ompRegion;
-+    };
-+    return findOmpRegionImpl(op, findOmpRegionImpl);
-+  };
 +
-+  auto oldOmpRegion = findOmpRegion(oldAlloc);
- 
-   // Find when the last operand value becomes available
-   mlir::Block *operandsBlock = nullptr;
-@@ -600,8 +623,7 @@
-     LLVM_DEBUG(llvm::dbgs()
-                << "--Placing after last operand: " << *lastOperand << "\n");
-     // check we aren't moving out of an omp region
--    auto lastOpOmpRegion =
--        lastOperand->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
-+    auto lastOpOmpRegion = findOmpRegion(lastOperand);
-     if (lastOpOmpRegion == oldOmpRegion)
-       return checkReturn(lastOperand);
-     // Presumably this happened because the operands became ready before the
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir llvm-project/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
---- llvm-project.orig/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir	2024-06-12 10:43:12.656210116 -0500
-+++ llvm-project/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir	2024-06-12 10:44:09.351614239 -0500
-@@ -199,6 +199,7 @@
-         fir.store %3 to %6 : !fir.ref<i32>
-         omp.yield
++      // Look for declare target information in case this global is intended to
++      // always exist on the device.
++      auto declareTargetIface =
++          llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
++              globalOp.getOperation());
++      bool hostOnlySymbol = !declareTargetIface ||
++                            !declareTargetIface.isDeclareTarget() ||
++                            declareTargetIface.getDeclareTargetDeviceType() ==
++                                omp::DeclareTargetDeviceType::host;
++
++      // Remove unused host symbols with external linkage.
++      if (symbolUnused && !globalOp.getLinkName() && hostOnlySymbol)
++        globalOp.erase();
++    });
++  }
++};
++} // namespace
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp llvm-project-aso/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+--- llvm-project-aso-orig/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp	2024-11-23 20:25:26.843275164 -0600
++++ llvm-project-aso/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp	2024-11-23 20:39:47.180175366 -0600
+@@ -62,7 +62,7 @@
+   /// Tracks any intermediate function/subroutine local allocations we
+   /// generate for the descriptors of box type dummy arguments, so that
+   /// we can retrieve it for subsequent reuses within the functions
+-  /// scope
++  /// scope.
+   std::map</*descriptor opaque pointer=*/void *,
+            /*corresponding local alloca=*/fir::AllocaOp>
+       localBoxAllocas;
+@@ -370,24 +370,23 @@
+     if (!mapClauseOwner)
+       return;
+ 
+-    auto addOperands = [&](mlir::MutableOperandRange &mutableOpRange,
++    auto addOperands = [&](mlir::MutableOperandRange &mapVarsArr,
+                            mlir::Operation *directiveOp,
+                            unsigned blockArgInsertIndex = 0) {
+-      if (!llvm::is_contained(mutableOpRange.getAsOperandRange(),
+-                              op.getResult()))
++      if (!llvm::is_contained(mapVarsArr.getAsOperandRange(), op.getResult()))
+         return;
+ 
+       // There doesn't appear to be a simple way to convert MutableOperandRange
+       // to a vector currently, so we instead use a for_each to populate our
+       // vector.
+       llvm::SmallVector<mlir::Value> newMapOps;
+-      newMapOps.reserve(mutableOpRange.size());
++      newMapOps.reserve(mapVarsArr.size());
+       llvm::for_each(
+-          mutableOpRange.getAsOperandRange(),
++          mapVarsArr.getAsOperandRange(),
+           [&newMapOps](mlir::Value oper) { newMapOps.push_back(oper); });
+ 
+       for (auto mapMember : op.getMembers()) {
+-        if (llvm::is_contained(mutableOpRange.getAsOperandRange(), mapMember))
++        if (llvm::is_contained(mapVarsArr.getAsOperandRange(), mapMember))
+           continue;
+         newMapOps.push_back(mapMember);
+         if (directiveOp) {
+@@ -397,7 +396,7 @@
+         }
        }
-+      omp.terminator
+ 
+-      mutableOpRange.assign(newMapOps);
++      mapVarsArr.assign(newMapOps);
+     };
+ 
+     auto argIface =
+@@ -405,14 +404,13 @@
+ 
+     if (auto mapClauseOwner =
+             llvm::dyn_cast<mlir::omp::MapClauseOwningOpInterface>(target)) {
+-      mlir::MutableOperandRange mapMutableOpRange =
+-          mapClauseOwner.getMapVarsMutable();
++      mlir::MutableOperandRange mapVarsArr = mapClauseOwner.getMapVarsMutable();
+       unsigned blockArgInsertIndex =
+           argIface
+               ? argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs()
+               : 0;
+       addOperands(
+-          mapMutableOpRange,
++          mapVarsArr,
+           llvm::dyn_cast_or_null<mlir::omp::TargetOp>(argIface.getOperation()),
+           blockArgInsertIndex);
      }
-     omp.terminator
+@@ -466,10 +464,7 @@
+   // operation (usually function) containing the MapInfoOp because this pass
+   // will mutate siblings of MapInfoOp.
+   void runOnOperation() override {
+-    mlir::ModuleOp module =
+-        mlir::dyn_cast_or_null<mlir::ModuleOp>(getOperation());
+-    if (!module)
+-      module = getOperation()->getParentOfType<mlir::ModuleOp>();
++    mlir::ModuleOp module = mlir::cast<mlir::ModuleOp>(getOperation());
+     fir::KindMapping kindMap = fir::getKindMapping(module);
+     fir::FirOpBuilder builder{module, std::move(kindMap)};
+ 
+@@ -481,7 +476,7 @@
+     // ourselves to the possibility of race conditions while this pass
+     // undergoes frequent re-iteration for the near future. So we loop
+     // over function in the module and then map.info inside of those.
+-    getOperation()->walk([&](mlir::func::FuncOp func) {
++    module->walk([&](mlir::func::FuncOp func) {
+       // clear all local allocations we made for any boxes in any prior
+       // iterations from previous function scopes.
+       localBoxAllocas.clear();
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/lib/Optimizer/Passes/Pipelines.cpp llvm-project-aso/flang/lib/Optimizer/Passes/Pipelines.cpp
+--- llvm-project-aso-orig/flang/lib/Optimizer/Passes/Pipelines.cpp	2024-11-23 20:25:26.843275164 -0600
++++ llvm-project-aso/flang/lib/Optimizer/Passes/Pipelines.cpp	2024-11-23 20:39:47.180175366 -0600
+@@ -243,12 +243,21 @@
+ /// \param pm - MLIR pass manager that will hold the pipeline definition.
+ /// \param isTargetDevice - Whether code is being generated for a target device
+ /// rather than the host device.
+-void createOpenMPFIRPassPipeline(mlir::PassManager &pm, bool isTargetDevice) {
++void createOpenMPFIRPassPipeline(mlir::PassManager &pm,
++                                 OpenMPFIRPassPipelineOpts opts) {
++  if (opts.doConcurrentMappingKind != DoConcurrentMappingKind::DCMK_None)
++    pm.addPass(flangomp::createDoConcurrentConversionPass(
++        opts.doConcurrentMappingKind == DoConcurrentMappingKind::DCMK_Device));
++
+   pm.addPass(flangomp::createMapInfoFinalizationPass());
+   pm.addPass(flangomp::createMapsForPrivatizedSymbolsPass());
+   pm.addPass(flangomp::createMarkDeclareTargetPass());
+-  if (isTargetDevice)
++  if (opts.isTargetDevice) {
+     pm.addPass(flangomp::createFunctionFilteringPass());
++
++    if (opts.enableOffloadGlobalFiltering)
++      pm.addPass(flangomp::createGlobalFilteringPass());
++  }
+ }
+ 
+ void createDebugPasses(mlir::PassManager &pm,
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/array-constructor.cpp llvm-project-aso/flang/runtime/array-constructor.cpp
+--- llvm-project-aso-orig/flang/runtime/array-constructor.cpp	2024-08-27 20:36:25.236173040 -0500
++++ llvm-project-aso/flang/runtime/array-constructor.cpp	2024-11-23 20:39:47.180175366 -0600
+@@ -176,7 +176,7 @@
+   AllocateOrReallocateVectorIfNeeded(vector, terminator, to.Elements(), 1);
+   SubscriptValue subscript[1]{
+       to.GetDimension(0).LowerBound() + vector.nextValuePosition};
+-  std::memcpy(to.Element<char>(subscript), from, to.ElementBytes());
++  Fortran::runtime::memcpy(to.Element<char>(subscript), from, to.ElementBytes());
+   ++vector.nextValuePosition;
+ }
+ 
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/assign.cpp llvm-project-aso/flang/runtime/assign.cpp
+--- llvm-project-aso-orig/flang/runtime/assign.cpp	2024-11-23 20:25:26.851275134 -0600
++++ llvm-project-aso/flang/runtime/assign.cpp	2024-11-23 20:39:47.180175366 -0600
+@@ -263,7 +263,7 @@
+   if (MayAlias(to, from)) {
+     if (mustDeallocateLHS) {
+       deferDeallocation = &deferredDeallocStatDesc.descriptor();
+-      std::memcpy(deferDeallocation, &to, to.SizeInBytes());
++      Fortran::runtime::memcpy(deferDeallocation, &to, to.SizeInBytes());
+       to.set_base_addr(nullptr);
+     } else if (!isSimpleMemmove()) {
+       // Handle LHS/RHS aliasing by copying RHS into a temp, then
+@@ -271,7 +271,7 @@
+       auto descBytes{from.SizeInBytes()};
+       StaticDescriptor<maxRank, true, 16> staticDesc;
+       Descriptor &newFrom{staticDesc.descriptor()};
+-      std::memcpy(&newFrom, &from, descBytes);
++      Fortran::runtime::memcpy(&newFrom, &from, descBytes);
+       // Pretend the temporary descriptor is for an ALLOCATABLE
+       // entity, otherwise, the Deallocate() below will not
+       // free the descriptor memory.
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/buffer.h llvm-project-aso/flang/runtime/buffer.h
+--- llvm-project-aso-orig/flang/runtime/buffer.h	2024-08-27 20:36:25.240173000 -0500
++++ llvm-project-aso/flang/runtime/buffer.h	2024-11-23 20:39:47.180175366 -0600
+@@ -158,8 +158,8 @@
+       // Avoid passing a null pointer, since it would result in an undefined
+       // behavior.
+       if (old != nullptr) {
+-        std::memcpy(buffer_, old + start_, chunk);
+-        std::memcpy(buffer_ + chunk, old, length_ - chunk);
++        Fortran::runtime::memcpy(buffer_, old + start_, chunk);
++        Fortran::runtime::memcpy(buffer_ + chunk, old, length_ - chunk);
+         FreeMemory(old);
+       }
+       start_ = 0;
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/character.cpp llvm-project-aso/flang/runtime/character.cpp
+--- llvm-project-aso-orig/flang/runtime/character.cpp	2024-08-27 20:36:25.240173000 -0500
++++ llvm-project-aso/flang/runtime/character.cpp	2024-11-23 20:39:47.180175366 -0600
+@@ -596,8 +596,8 @@
+   from.GetLowerBounds(fromAt);
+   for (; elements-- > 0;
+        to += newBytes, p += oldBytes, from.IncrementSubscripts(fromAt)) {
+-    std::memcpy(to, p, oldBytes);
+-    std::memcpy(to + oldBytes, from.Element<char>(fromAt), fromBytes);
++    Fortran::runtime::memcpy(to, p, oldBytes);
++    Fortran::runtime::memcpy(to + oldBytes, from.Element<char>(fromAt), fromBytes);
    }
-@@ -223,6 +224,7 @@
- // CHECK:   llvm.store %[[I1]], %[[ARR_I_REF]] : i32, !llvm.ptr
- // CHECK: omp.yield
- // CHECK: }
-+// CHECK: omp.terminator
- // CHECK: }
- // CHECK: omp.terminator
- // CHECK: }
-@@ -516,6 +518,7 @@
-       fir.store %7 to %3 : !fir.ref<i32>
-       omp.yield
+   FreeMemory(old);
+ }
+@@ -611,7 +611,7 @@
+   std::size_t oldLen{accumulator.ElementBytes()};
+   accumulator.raw().elem_len += chars;
+   RUNTIME_CHECK(terminator, accumulator.Allocate() == CFI_SUCCESS);
+-  std::memcpy(accumulator.OffsetElement<char>(oldLen), from, chars);
++  Fortran::runtime::memcpy(accumulator.OffsetElement<char>(oldLen), from, chars);
+   FreeMemory(old);
+ }
+ 
+@@ -677,7 +677,7 @@
+ std::size_t RTDEF(CharacterAppend1)(char *lhs, std::size_t lhsBytes,
+     std::size_t offset, const char *rhs, std::size_t rhsBytes) {
+   if (auto n{std::min(lhsBytes - offset, rhsBytes)}) {
+-    std::memcpy(lhs + offset, rhs, n);
++    Fortran::runtime::memcpy(lhs + offset, rhs, n);
+     offset += n;
+   }
+   return offset;
+@@ -685,7 +685,7 @@
+ 
+ void RTDEF(CharacterPad1)(char *lhs, std::size_t bytes, std::size_t offset) {
+   if (bytes > offset) {
+-    std::memset(lhs + offset, ' ', bytes - offset);
++    Fortran::runtime::memset(lhs + offset, ' ', bytes - offset);
+   }
+ }
+ 
+@@ -817,7 +817,7 @@
+   }
+   const char *from{string.OffsetElement()};
+   for (char *to{result.OffsetElement()}; ncopies-- > 0; to += origBytes) {
+-    std::memcpy(to, from, origBytes);
++    Fortran::runtime::memcpy(to, from, origBytes);
+   }
+ }
+ 
+@@ -847,7 +847,7 @@
+   result.Establish(string.type(), resultBytes, nullptr, 0, nullptr,
+       CFI_attribute_allocatable);
+   RUNTIME_CHECK(terminator, result.Allocate() == CFI_SUCCESS);
+-  std::memcpy(result.OffsetElement(), string.OffsetElement(), resultBytes);
++  Fortran::runtime::memcpy(result.OffsetElement(), string.OffsetElement(), resultBytes);
+ }
+ 
+ std::size_t RTDEF(Verify1)(const char *x, std::size_t xLen, const char *set,
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/command.cpp llvm-project-aso/flang/runtime/command.cpp
+--- llvm-project-aso-orig/flang/runtime/command.cpp	2024-08-27 20:36:25.240173000 -0500
++++ llvm-project-aso/flang/runtime/command.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -54,7 +54,7 @@
+ 
+ static void FillWithSpaces(const Descriptor &value, std::size_t offset = 0) {
+   if (offset < value.ElementBytes()) {
+-    std::memset(
++    Fortran::runtime::memset(
+         value.OffsetElement(offset), ' ', value.ElementBytes() - offset);
+   }
+ }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/copy.cpp llvm-project-aso/flang/runtime/copy.cpp
+--- llvm-project-aso-orig/flang/runtime/copy.cpp	2024-08-27 20:36:25.240173000 -0500
++++ llvm-project-aso/flang/runtime/copy.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -9,6 +9,7 @@
+ #include "copy.h"
+ #include "stack.h"
+ #include "terminator.h"
++#include "tools.h"
+ #include "type-info.h"
+ #include "flang/Runtime/allocatable.h"
+ #include "flang/Runtime/descriptor.h"
+@@ -101,7 +102,7 @@
+     char *toPtr{to.Element<char>(toAt)};
+     char *fromPtr{from.Element<char>(fromAt)};
+     RUNTIME_CHECK(terminator, to.ElementBytes() == from.ElementBytes());
+-    std::memcpy(toPtr, fromPtr, to.ElementBytes());
++    Fortran::runtime::memcpy(toPtr, fromPtr, to.ElementBytes());
+     return;
+   }
+ 
+@@ -148,7 +149,7 @@
+     // Moreover, if we came here from an Component::Genre::Data component,
+     // all the per-element copies are redundant, because the parent
+     // has already been copied as a whole.
+-    std::memcpy(toPtr, fromPtr, curTo.ElementBytes());
++    Fortran::runtime::memcpy(toPtr, fromPtr, curTo.ElementBytes());
+     --elements;
+     if (elements != 0) {
+       currentCopy.IncrementSubscripts(terminator);
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/derived.cpp llvm-project-aso/flang/runtime/derived.cpp
+--- llvm-project-aso-orig/flang/runtime/derived.cpp	2024-08-27 20:36:25.240173000 -0500
++++ llvm-project-aso/flang/runtime/derived.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -73,7 +73,7 @@
+       std::size_t bytes{comp.SizeInBytes(instance)};
+       for (std::size_t j{0}; j++ < elements; instance.IncrementSubscripts(at)) {
+         char *ptr{instance.ElementComponent<char>(at, comp.offset())};
+-        std::memcpy(ptr, init, bytes);
++        Fortran::runtime::memcpy(ptr, init, bytes);
+       }
+     } else if (comp.genre() == typeInfo::Component::Genre::Pointer) {
+       // Data pointers without explicit initialization are established
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/descriptor.cpp llvm-project-aso/flang/runtime/descriptor.cpp
+--- llvm-project-aso-orig/flang/runtime/descriptor.cpp	2024-08-27 20:36:25.240173000 -0500
++++ llvm-project-aso/flang/runtime/descriptor.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -26,7 +26,7 @@
+ RT_API_ATTRS Descriptor::Descriptor(const Descriptor &that) { *this = that; }
+ 
+ RT_API_ATTRS Descriptor &Descriptor::operator=(const Descriptor &that) {
+-  std::memcpy(this, &that, that.SizeInBytes());
++  Fortran::runtime::memcpy(this, &that, that.SizeInBytes());
+   return *this;
+ }
+ 
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/descriptor-io.cpp llvm-project-aso/flang/runtime/descriptor-io.cpp
+--- llvm-project-aso-orig/flang/runtime/descriptor-io.cpp	2024-08-27 20:36:25.240173000 -0500
++++ llvm-project-aso/flang/runtime/descriptor-io.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -32,7 +32,7 @@
+     if (edit.descriptor == DataEdit::DefinedDerivedType) {
+       ioType[0] = 'D';
+       ioType[1] = 'T';
+-      std::memcpy(ioType + 2, edit.ioType, edit.ioTypeChars);
++      Fortran::runtime::memcpy(ioType + 2, edit.ioType, edit.ioTypeChars);
+     } else {
+       runtime::strcpy(
+           ioType, io.mutableModes().inNamelist ? "NAMELIST" : "LISTDIRECTED");
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/edit-input.cpp llvm-project-aso/flang/runtime/edit-input.cpp
+--- llvm-project-aso-orig/flang/runtime/edit-input.cpp	2024-09-13 09:46:38.870303386 -0500
++++ llvm-project-aso/flang/runtime/edit-input.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -121,7 +121,7 @@
+   io.HandleAbsolutePosition(start);
+   remaining.reset();
+   // Make a second pass now that the digit count is known
+-  std::memset(n, 0, bytes);
++  Fortran::runtime::memset(n, 0, bytes);
+   int increment{isHostLittleEndian ? -1 : 1};
+   auto *data{reinterpret_cast<unsigned char *>(n) +
+       (isHostLittleEndian ? significantBytes - 1 : bytes - significantBytes)};
+@@ -280,9 +280,9 @@
+     // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian.
+     if (!isHostLittleEndian && shft >= 0) {
+       auto l{value.low() << (8 * shft)};
+-      std::memcpy(n, &l, kind);
++      Fortran::runtime::memcpy(n, &l, kind);
+     } else {
+-      std::memcpy(n, &value, kind); // a blank field means zero
++      Fortran::runtime::memcpy(n, &value, kind); // a blank field means zero
      }
-+    omp.terminator
+     return true;
+   } else {
+@@ -1095,7 +1095,7 @@
+         --skipChars;
+       } else {
+         char32_t buffer{0};
+-        std::memcpy(&buffer, input, chunkBytes);
++        Fortran::runtime::memcpy(&buffer, input, chunkBytes);
+         if ((sizeof *x == 1 && buffer > 0xff) ||
+             (sizeof *x == 2 && buffer > 0xffff)) {
+           *x++ = '?';
+@@ -1122,7 +1122,7 @@
+         chunkBytes = std::min<std::size_t>(remainingChars, readyBytes);
+         chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes);
+         chunkChars = chunkBytes;
+-        std::memcpy(x, input, chunkBytes);
++        Fortran::runtime::memcpy(x, input, chunkBytes);
+         x += chunkBytes;
+         lengthChars -= chunkChars;
+       }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/extensions.cpp llvm-project-aso/flang/runtime/extensions.cpp
+--- llvm-project-aso-orig/flang/runtime/extensions.cpp	2024-10-18 17:40:32.520992126 -0500
++++ llvm-project-aso/flang/runtime/extensions.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -78,7 +78,7 @@
+ 
+ void GetUsernameEnvVar(const char *envName, char *arg, std::int64_t length) {
+   Descriptor name{*Descriptor::Create(
+-      1, std::strlen(envName) + 1, const_cast<char *>(envName), 0)};
++      1, Fortran::runtime::strlen(envName) + 1, const_cast<char *>(envName), 0)};
+   Descriptor value{*Descriptor::Create(1, length, arg, 0)};
+ 
+   RTNAME(GetEnvVariable)
+@@ -102,7 +102,7 @@
+   char str[26];
+   // Insufficient space, fill with spaces and return.
+   if (length < 24) {
+-    std::memset(arg, ' ', length);
++    Fortran::runtime::memset(arg, ' ', length);
+     return;
+   }
+ 
+@@ -134,8 +134,8 @@
+ void FORTRAN_PROCEDURE_NAME(getlog)(char *arg, std::int64_t length) {
+ #if _REENTRANT || _POSIX_C_SOURCE >= 199506L
+   if (length >= 1 && getlogin_r(arg, length) == 0) {
+-    auto loginLen{std::strlen(arg)};
+-    std::memset(
++    auto loginLen{Fortran::runtime::strlen(arg)};
++    Fortran::runtime::memset(
+         arg + loginLen, ' ', static_cast<std::size_t>(length) - loginLen);
+     return;
+   }
+@@ -189,7 +189,7 @@
+   char *newName{nullptr};
+   if (name[nameLength - 1] != '\0') {
+     newName = static_cast<char *>(std::malloc(nameLength + 1));
+-    std::memcpy(newName, name, nameLength);
++    Fortran::runtime::memcpy(newName, name, nameLength);
+     newName[nameLength] = '\0';
+     name = newName;
+   }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/external-unit.cpp llvm-project-aso/flang/runtime/external-unit.cpp
+--- llvm-project-aso-orig/flang/runtime/external-unit.cpp	2024-08-27 20:36:25.240173000 -0500
++++ llvm-project-aso/flang/runtime/external-unit.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -122,7 +122,7 @@
+   bool impliedClose{false};
+   if (IsConnected()) {
+     bool isSamePath{newPath.get() && path() && pathLength() == newPathLength &&
+-        std::memcmp(path(), newPath.get(), newPathLength) == 0};
++        Fortran::runtime::memcmp(path(), newPath.get(), newPathLength) == 0};
+     if (status && *status != OpenStatus::Old && isSamePath) {
+       handler.SignalError("OPEN statement for connected unit may not have "
+                           "explicit STATUS= other than 'OLD'");
+@@ -202,7 +202,7 @@
+   std::size_t pathMaxLen{32};
+   auto path{SizedNew<char>{handler}(pathMaxLen)};
+   std::snprintf(path.get(), pathMaxLen, "fort.%d", unitNumber_);
+-  OpenUnit(status, action, position, std::move(path), std::strlen(path.get()),
++  OpenUnit(status, action, position, std::move(path), Fortran::runtime::strlen(path.get()),
+       convert, handler);
+   return IsConnected();
+ }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/extrema.cpp llvm-project-aso/flang/runtime/extrema.cpp
+--- llvm-project-aso-orig/flang/runtime/extrema.cpp	2024-09-24 18:07:09.519920643 -0500
++++ llvm-project-aso/flang/runtime/extrema.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -374,7 +374,7 @@
+       CreatePartialReductionResult(result, x,
+           Descriptor::BytesFor(TypeCategory::Integer, kind), dim, terminator,
+           intrinsic, TypeCode{TypeCategory::Integer, kind});
+-      std::memset(
++      Fortran::runtime::memset(
+           result.OffsetElement(), 0, result.Elements() * result.ElementBytes());
+       return;
+     }
+@@ -518,11 +518,11 @@
+     static_assert(std::is_same_v<A, Type>);
+     std::size_t byteSize{array_.ElementBytes()};
+     if (extremum_) {
+-      std::memcpy(p, extremum_, byteSize);
++      Fortran::runtime::memcpy(p, extremum_, byteSize);
+     } else {
+       // Empty array; fill with character 0 for MAXVAL.
+       // For MINVAL, set all of the bits.
+-      std::memset(p, IS_MAXVAL ? 0 : 255, byteSize);
++      Fortran::runtime::memset(p, IS_MAXVAL ? 0 : 255, byteSize);
+     }
+   }
+   RT_API_ATTRS bool Accumulate(const Type *x) {
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/format-implementation.h llvm-project-aso/flang/runtime/format-implementation.h
+--- llvm-project-aso-orig/flang/runtime/format-implementation.h	2024-09-13 09:46:38.870303386 -0500
++++ llvm-project-aso/flang/runtime/format-implementation.h	2024-11-23 20:39:47.184175353 -0600
+@@ -49,7 +49,7 @@
+       SubscriptValue at[maxRank];
+       formatDescriptor->GetLowerBounds(at);
+       for (std::size_t j{0}; j < elements; ++j) {
+-        std::memcpy(p, formatDescriptor->Element<char>(at), elementBytes);
++        Fortran::runtime::memcpy(p, formatDescriptor->Element<char>(at), elementBytes);
+         p += elementBytes;
+         formatDescriptor->IncrementSubscripts(at);
+       }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/internal-unit.cpp llvm-project-aso/flang/runtime/internal-unit.cpp
+--- llvm-project-aso-orig/flang/runtime/internal-unit.cpp	2024-08-27 20:36:25.240173000 -0500
++++ llvm-project-aso/flang/runtime/internal-unit.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -70,7 +70,7 @@
+       BlankFill(record + furthestPositionInRecord,
+           positionInRecord - furthestPositionInRecord);
+     }
+-    std::memcpy(record + positionInRecord, data, bytes);
++    Fortran::runtime::memcpy(record + positionInRecord, data, bytes);
+     positionInRecord += bytes;
+     furthestPositionInRecord = furthestAfter;
+     return ok;
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/io-error.cpp llvm-project-aso/flang/runtime/io-error.cpp
+--- llvm-project-aso-orig/flang/runtime/io-error.cpp	2024-08-27 20:36:25.240173000 -0500
++++ llvm-project-aso/flang/runtime/io-error.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -151,7 +151,7 @@
+   } else if (ok) {
+     std::size_t copied{Fortran::runtime::strlen(buffer)};
+     if (copied < bufferLength) {
+-      std::memset(buffer + copied, ' ', bufferLength - copied);
++      Fortran::runtime::memset(buffer + copied, ' ', bufferLength - copied);
+     }
+     return true;
+   } else {
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/matmul.cpp llvm-project-aso/flang/runtime/matmul.cpp
+--- llvm-project-aso-orig/flang/runtime/matmul.cpp	2024-09-24 18:07:09.519920643 -0500
++++ llvm-project-aso/flang/runtime/matmul.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -81,7 +81,7 @@
+     SubscriptValue n, std::size_t xColumnByteStride = 0,
+     std::size_t yColumnByteStride = 0) {
+   using ResultType = CppTypeFor<RCAT, RKIND>;
+-  std::memset(product, 0, rows * cols * sizeof *product);
++  Fortran::runtime::memset(product, 0, rows * cols * sizeof *product);
+   const XT *RESTRICT xp0{x};
+   for (SubscriptValue k{0}; k < n; ++k) {
+     ResultType *RESTRICT p{product};
+@@ -153,7 +153,7 @@
+     SubscriptValue n, const XT *RESTRICT x, const YT *RESTRICT y,
+     std::size_t xColumnByteStride = 0) {
+   using ResultType = CppTypeFor<RCAT, RKIND>;
+-  std::memset(product, 0, rows * sizeof *product);
++  Fortran::runtime::memset(product, 0, rows * sizeof *product);
+   [[maybe_unused]] const XT *RESTRICT xp0{x};
+   for (SubscriptValue k{0}; k < n; ++k) {
+     ResultType *RESTRICT p{product};
+@@ -203,7 +203,7 @@
+     SubscriptValue cols, const XT *RESTRICT x, const YT *RESTRICT y,
+     std::size_t yColumnByteStride = 0) {
+   using ResultType = CppTypeFor<RCAT, RKIND>;
+-  std::memset(product, 0, cols * sizeof *product);
++  Fortran::runtime::memset(product, 0, cols * sizeof *product);
+   for (SubscriptValue k{0}; k < n; ++k) {
+     ResultType *RESTRICT p{product};
+     auto xv{static_cast<ResultType>(*x++)};
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/matmul-transpose.cpp llvm-project-aso/flang/runtime/matmul-transpose.cpp
+--- llvm-project-aso-orig/flang/runtime/matmul-transpose.cpp	2024-09-24 18:07:09.519920643 -0500
++++ llvm-project-aso/flang/runtime/matmul-transpose.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -62,7 +62,7 @@
+     std::size_t yColumnByteStride = 0) {
+   using ResultType = CppTypeFor<RCAT, RKIND>;
+ 
+-  std::memset(product, 0, rows * cols * sizeof *product);
++  Fortran::runtime::memset(product, 0, rows * cols * sizeof *product);
+   for (SubscriptValue j{0}; j < cols; ++j) {
+     for (SubscriptValue i{0}; i < rows; ++i) {
+       for (SubscriptValue k{0}; k < n; ++k) {
+@@ -132,7 +132,7 @@
+     SubscriptValue n, const XT *RESTRICT x, const YT *RESTRICT y,
+     std::size_t xColumnByteStride = 0) {
+   using ResultType = CppTypeFor<RCAT, RKIND>;
+-  std::memset(product, 0, rows * sizeof *product);
++  Fortran::runtime::memset(product, 0, rows * sizeof *product);
+   for (SubscriptValue i{0}; i < rows; ++i) {
+     for (SubscriptValue k{0}; k < n; ++k) {
+       ResultType x_ki;
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/misc-intrinsic.cpp llvm-project-aso/flang/runtime/misc-intrinsic.cpp
+--- llvm-project-aso-orig/flang/runtime/misc-intrinsic.cpp	2024-08-27 20:36:25.240173000 -0500
++++ llvm-project-aso/flang/runtime/misc-intrinsic.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -42,14 +42,14 @@
+   source.GetLowerBounds(sourceAt);
+   while (resultBytes > 0 && sourceElements > 0) {
+     std::size_t toMove{std::min(resultBytes, sourceElementBytes)};
+-    std::memcpy(to, source.Element<char>(sourceAt), toMove);
++    Fortran::runtime::memcpy(to, source.Element<char>(sourceAt), toMove);
+     to += toMove;
+     resultBytes -= toMove;
+     --sourceElements;
+     source.IncrementSubscripts(sourceAt);
+   }
+   if (resultBytes > 0) {
+-    std::memset(to, 0, resultBytes);
++    Fortran::runtime::memset(to, 0, resultBytes);
+   }
+ }
+ 
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/pseudo-unit.cpp llvm-project-aso/flang/runtime/pseudo-unit.cpp
+--- llvm-project-aso-orig/flang/runtime/pseudo-unit.cpp	2024-08-27 20:36:25.244172960 -0500
++++ llvm-project-aso/flang/runtime/pseudo-unit.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -132,7 +132,7 @@
+   // TODO: use persistent string buffer that can be reallocated
+   // as needed, and only freed at destruction of *this.
+   auto string{SizedNew<char>{handler}(bytes + 1)};
+-  std::memcpy(string.get(), buffer, bytes);
++  Fortran::runtime::memcpy(string.get(), buffer, bytes);
+   string.get()[bytes] = '\0';
+   std::printf("%s", string.get());
+   return bytes;
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/ragged.cpp llvm-project-aso/flang/runtime/ragged.cpp
+--- llvm-project-aso-orig/flang/runtime/ragged.cpp	2024-08-27 20:36:25.244172960 -0500
++++ llvm-project-aso/flang/runtime/ragged.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -40,7 +40,7 @@
+     std::size_t bytes{static_cast<std::size_t>(elementSize * size)};
+     header->bufferPointer = AllocateMemoryOrCrash(terminator, bytes);
+     if (header->bufferPointer) {
+-      std::memset(header->bufferPointer, 0, bytes);
++      Fortran::runtime::memset(header->bufferPointer, 0, bytes);
+     }
+     return header;
+   } else {
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/reduce.cpp llvm-project-aso/flang/runtime/reduce.cpp
+--- llvm-project-aso-orig/flang/runtime/reduce.cpp	2024-09-24 18:07:09.519920643 -0500
++++ llvm-project-aso/flang/runtime/reduce.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -79,16 +79,16 @@
+       activeTemp_ = 1 - activeTemp_;
+     } else {
+       activeTemp_ = 0;
+-      std::memcpy(&*temp_[activeTemp_], operand, elementBytes_);
++      Fortran::runtime::memcpy(&*temp_[activeTemp_], operand, elementBytes_);
+     }
+     return true;
+   }
+   template <typename A>
+   RT_API_ATTRS void GetResult(A *to, int /*zeroBasedDim*/ = -1) {
+     if (activeTemp_ >= 0) {
+-      std::memcpy(to, &*temp_[activeTemp_], elementBytes_);
++      Fortran::runtime::memcpy(to, &*temp_[activeTemp_], elementBytes_);
+     } else if (identity_) {
+-      std::memcpy(to, identity_, elementBytes_);
++      Fortran::runtime::memcpy(to, identity_, elementBytes_);
+     } else {
+       terminator_.Crash("REDUCE() without IDENTITY= has no result");
+     }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/stat.cpp llvm-project-aso/flang/runtime/stat.cpp
+--- llvm-project-aso-orig/flang/runtime/stat.cpp	2024-08-27 20:36:25.244172960 -0500
++++ llvm-project-aso/flang/runtime/stat.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -84,10 +84,10 @@
+       std::size_t bufferLength{errmsg->ElementBytes()};
+       std::size_t msgLength{Fortran::runtime::strlen(msg)};
+       if (msgLength >= bufferLength) {
+-        std::memcpy(buffer, msg, bufferLength);
++        Fortran::runtime::memcpy(buffer, msg, bufferLength);
+       } else {
+-        std::memcpy(buffer, msg, msgLength);
+-        std::memset(buffer + msgLength, ' ', bufferLength - msgLength);
++        Fortran::runtime::memcpy(buffer, msg, msgLength);
++        Fortran::runtime::memset(buffer + msgLength, ' ', bufferLength - msgLength);
+       }
+     }
+   }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/time-intrinsic.cpp llvm-project-aso/flang/runtime/time-intrinsic.cpp
+--- llvm-project-aso-orig/flang/runtime/time-intrinsic.cpp	2024-09-13 09:46:38.870303386 -0500
++++ llvm-project-aso/flang/runtime/time-intrinsic.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -221,13 +221,13 @@
+     char *zone, std::size_t zoneChars,
+     const Fortran::runtime::Descriptor *values) {
+   if (date) {
+-    std::memset(date, static_cast<int>(' '), dateChars);
++    Fortran::runtime::memset(date, static_cast<int>(' '), dateChars);
+   }
+   if (time) {
+-    std::memset(time, static_cast<int>(' '), timeChars);
++    Fortran::runtime::memset(time, static_cast<int>(' '), timeChars);
+   }
+   if (zone) {
+-    std::memset(zone, static_cast<int>(' '), zoneChars);
++    Fortran::runtime::memset(zone, static_cast<int>(' '), zoneChars);
+   }
+   if (values) {
+     auto typeCode{values->type().GetCategoryAndKind()};
+@@ -365,7 +365,7 @@
+   auto copyBufferAndPad{
+       [&](char *dest, std::size_t destChars, std::size_t len) {
+         auto copyLen{std::min(len, destChars)};
+-        std::memcpy(dest, buffer, copyLen);
++        Fortran::runtime::memcpy(dest, buffer, copyLen);
+         for (auto i{copyLen}; i < destChars; ++i) {
+           dest[i] = ' ';
+         }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/tools.cpp llvm-project-aso/flang/runtime/tools.cpp
+--- llvm-project-aso-orig/flang/runtime/tools.cpp	2024-08-27 20:36:25.244172960 -0500
++++ llvm-project-aso/flang/runtime/tools.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -28,7 +28,7 @@
+     const char *s, std::size_t length, const Terminator &terminator) {
+   if (s) {
+     auto *p{static_cast<char *>(AllocateMemoryOrCrash(terminator, length + 1))};
+-    std::memcpy(p, s, length);
++    Fortran::runtime::memcpy(p, s, length);
+     p[length] = '\0';
+     return OwningPtr<char>{p};
+   } else {
+@@ -75,10 +75,10 @@
+     char *to, std::size_t toLength, const char *from) {
+   std::size_t len{Fortran::runtime::strlen(from)};
+   if (len < toLength) {
+-    std::memcpy(to, from, len);
+-    std::memset(to + len, ' ', toLength - len);
++    Fortran::runtime::memcpy(to, from, len);
++    Fortran::runtime::memset(to + len, ' ', toLength - len);
+   } else {
+-    std::memcpy(to, from, toLength);
++    Fortran::runtime::memcpy(to, from, toLength);
+   }
+ }
+ 
+@@ -122,7 +122,7 @@
+   std::size_t elementBytes{to.ElementBytes()};
+   for (std::size_t n{to.Elements()}; n-- > 0;
+        to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
+-    std::memcpy(
++    Fortran::runtime::memcpy(
+         to.Element<char>(toAt), from.Element<char>(fromAt), elementBytes);
+   }
+ }
+@@ -135,7 +135,7 @@
+   std::size_t elementBytes{to.ElementBytes()};
+   for (std::size_t n{to.Elements()}; n-- > 0;
+        toAt += elementBytes, from.IncrementSubscripts(fromAt)) {
+-    std::memcpy(toAt, from.Element<char>(fromAt), elementBytes);
++    Fortran::runtime::memcpy(toAt, from.Element<char>(fromAt), elementBytes);
+   }
+ }
+ 
+@@ -147,7 +147,7 @@
+   std::size_t elementBytes{to.ElementBytes()};
+   for (std::size_t n{to.Elements()}; n-- > 0;
+        to.IncrementSubscripts(toAt), fromAt += elementBytes) {
+-    std::memcpy(to.Element<char>(toAt), fromAt, elementBytes);
++    Fortran::runtime::memcpy(to.Element<char>(toAt), fromAt, elementBytes);
+   }
+ }
+ 
+@@ -155,7 +155,7 @@
+     bool toIsContiguous, bool fromIsContiguous) {
+   if (toIsContiguous) {
+     if (fromIsContiguous) {
+-      std::memcpy(to.OffsetElement(), from.OffsetElement(),
++      Fortran::runtime::memcpy(to.OffsetElement(), from.OffsetElement(),
+           to.Elements() * to.ElementBytes());
+     } else {
+       ShallowCopyDiscontiguousToContiguous(to, from);
+@@ -177,7 +177,7 @@
+     char *str, std::size_t length, Terminator &terminator) {
+   if (runtime::memchr(str, '\0', length) == nullptr) {
+     char *newCmd{(char *)AllocateMemoryOrCrash(terminator, length + 1)};
+-    std::memcpy(newCmd, str, length);
++    Fortran::runtime::memcpy(newCmd, str, length);
+     newCmd[length] = '\0';
+     return newCmd;
+   } else {
+@@ -209,7 +209,7 @@
+     return ToErrmsg(errmsg, StatValueTooShort);
+   }
+ 
+-  std::memcpy(value.OffsetElement(offset), rawValue, toCopy);
++  Fortran::runtime::memcpy(value.OffsetElement(offset), rawValue, toCopy);
+ 
+   if (static_cast<std::int64_t>(rawValueLength) > toCopy) {
+     return ToErrmsg(errmsg, StatValueTooShort);
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/tools.h llvm-project-aso/flang/runtime/tools.h
+--- llvm-project-aso-orig/flang/runtime/tools.h	2024-08-27 20:36:25.244172960 -0500
++++ llvm-project-aso/flang/runtime/tools.h	2024-11-23 20:39:47.184175353 -0600
+@@ -521,9 +521,9 @@
+       to[j] = static_cast<TO>(' ');
+     }
+   } else if (toChars <= fromChars) {
+-    std::memcpy(to, from, toChars * sizeof(TO));
++    Fortran::runtime::memcpy(to, from, toChars * sizeof(TO));
+   } else {
+-    std::memcpy(to, from, std::min(toChars, fromChars) * sizeof(TO));
++    Fortran::runtime::memcpy(to, from, std::min(toChars, fromChars) * sizeof(TO));
+     for (std::size_t j{fromChars}; j < toChars; ++j) {
+       to[j] = static_cast<TO>(' ');
+     }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/transformational.cpp llvm-project-aso/flang/runtime/transformational.cpp
+--- llvm-project-aso-orig/flang/runtime/transformational.cpp	2024-11-23 20:25:26.851275134 -0600
++++ llvm-project-aso/flang/runtime/transformational.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -114,7 +114,7 @@
+           "not yet implemented: CHARACTER(KIND=%d) in EOSHIFT intrinsic", kind);
+     }
+   } else {
+-    std::memset(result.raw().base_addr, 0, bytes);
++    Fortran::runtime::memset(result.raw().base_addr, 0, bytes);
+   }
+ }
+ 
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/unit.cpp llvm-project-aso/flang/runtime/unit.cpp
+--- llvm-project-aso-orig/flang/runtime/unit.cpp	2024-08-27 20:36:25.244172960 -0500
++++ llvm-project-aso/flang/runtime/unit.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -90,11 +90,11 @@
+   CheckDirectAccess(handler);
+   WriteFrame(frameOffsetInFile_, recordOffsetInFrame_ + furthestAfter, handler);
+   if (positionInRecord > furthestPositionInRecord) {
+-    std::memset(Frame() + recordOffsetInFrame_ + furthestPositionInRecord, ' ',
++    Fortran::runtime::memset(Frame() + recordOffsetInFrame_ + furthestPositionInRecord, ' ',
+         positionInRecord - furthestPositionInRecord);
+   }
+   char *to{Frame() + recordOffsetInFrame_ + positionInRecord};
+-  std::memcpy(to, data, bytes);
++  Fortran::runtime::memcpy(to, data, bytes);
+   if (swapEndianness_) {
+     SwapEndianness(to, bytes, elementBytes);
+   }
+@@ -119,7 +119,7 @@
+   auto need{recordOffsetInFrame_ + furthestAfter};
+   auto got{ReadFrame(frameOffsetInFile_, need, handler)};
+   if (got >= need) {
+-    std::memcpy(data, Frame() + recordOffsetInFrame_ + positionInRecord, bytes);
++    Fortran::runtime::memcpy(data, Frame() + recordOffsetInFrame_ + positionInRecord, bytes);
+     if (swapEndianness_) {
+       SwapEndianness(data, bytes, elementBytes);
+     }
+@@ -303,7 +303,7 @@
+         // Pad remainder of fixed length record
+         WriteFrame(
+             frameOffsetInFile_, recordOffsetInFrame_ + *openRecl, handler);
+-        std::memset(Frame() + recordOffsetInFrame_ + furthestPositionInRecord,
++        Fortran::runtime::memset(Frame() + recordOffsetInFrame_ + furthestPositionInRecord,
+             isUnformatted.value_or(false) ? 0 : ' ',
+             *openRecl - furthestPositionInRecord);
+         furthestPositionInRecord = *openRecl;
+@@ -778,7 +778,7 @@
+ std::int32_t ExternalFileUnit::ReadHeaderOrFooter(std::int64_t frameOffset) {
+   std::int32_t word;
+   char *wordPtr{reinterpret_cast<char *>(&word)};
+-  std::memcpy(wordPtr, Frame() + frameOffset, sizeof word);
++  Fortran::runtime::memcpy(wordPtr, Frame() + frameOffset, sizeof word);
+   if (swapEndianness_) {
+     SwapEndianness(wordPtr, sizeof word, sizeof word);
+   }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/runtime/unit-map.cpp llvm-project-aso/flang/runtime/unit-map.cpp
+--- llvm-project-aso-orig/flang/runtime/unit-map.cpp	2024-08-27 20:36:25.244172960 -0500
++++ llvm-project-aso/flang/runtime/unit-map.cpp	2024-11-23 20:39:47.184175353 -0600
+@@ -118,7 +118,7 @@
+     for (int j{0}; j < buckets_; ++j) {
+       for (Chain *p{bucket_[j].get()}; p; p = p->next.get()) {
+         if (p->unit.path() && p->unit.pathLength() == pathLen &&
+-            std::memcmp(p->unit.path(), path, pathLen) == 0) {
++            Fortran::runtime::memcmp(p->unit.path(), path, pathLen) == 0) {
+           return &p->unit;
+         }
+       }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Driver/bbc-openmp-version-macro.f90 llvm-project-aso/flang/test/Driver/bbc-openmp-version-macro.f90
+--- llvm-project-aso-orig/flang/test/Driver/bbc-openmp-version-macro.f90	2024-08-27 20:36:25.248172920 -0500
++++ llvm-project-aso/flang/test/Driver/bbc-openmp-version-macro.f90	2024-11-23 20:39:47.184175353 -0600
+@@ -13,7 +13,7 @@
+ ! RUN: bbc -fopenmp -fopenmp-version=51 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-51
+ ! RUN: bbc -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-52
+ 
+-! DEFAULT-OPENMP-VERSION: {{.*}} = arith.constant 199911 : i32
++! DEFAULT-OPENMP-VERSION: {{.*}} = arith.constant 202111 : i32
+ ! OPENMP-VERSION-11: {{.*}} = arith.constant 199911 : i32
+ ! OPENMP-VERSION-20: {{.*}} = arith.constant 200011 : i32
+ ! OPENMP-VERSION-25: {{.*}} = arith.constant 200505 : i32
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Driver/fdefault.f90 llvm-project-aso/flang/test/Driver/fdefault.f90
+--- llvm-project-aso-orig/flang/test/Driver/fdefault.f90	2024-08-27 20:36:25.248172920 -0500
++++ llvm-project-aso/flang/test/Driver/fdefault.f90	2024-11-23 20:40:34.720003838 -0600
+@@ -24,6 +24,31 @@
+ ! RUN: not %flang_fc1 -fsyntax-only -fdefault-double-8 %s  2>&1 | FileCheck %s --check-prefix=ERROR
+ 
+ ! NOOPTION: integer(4),parameter::real_kind=4_4
++! TODO: Add checks when actual codegen is possible for this family
++
++!--------------------------
++! FLANG DRIVER (flang-new)
++!--------------------------
++! RUN: rm -rf %t/dir-flang-new  && mkdir -p %t/dir-flang-new && %flang -fsyntax-only -module-dir %t/dir-flang-new %s  2>&1
++! RUN: cat %t/dir-flang-new/m.mod | FileCheck %s --check-prefix=NOOPTION
++! RUN: rm -rf %t/dir-flang-new  && mkdir -p %t/dir-flang-new && %flang -fsyntax-only -fdefault-real-8 -module-dir %t/dir-flang-new %s  2>&1
++! RUN: cat %t/dir-flang-new/m.mod | FileCheck %s --check-prefix=REAL8
++! RUN: rm -rf %t/dir-flang-new  && mkdir -p %t/dir-flang-new && %flang -fsyntax-only -fdefault-real-8 -fdefault-double-8 -module-dir %t/dir-flang-new %s  2>&1
++! RUN: cat %t/dir-flang-new/m.mod | FileCheck %s --check-prefix=DOUBLE8
++! RUN: not %flang -fsyntax-only -fdefault-double-8 %s  2>&1 | FileCheck %s --check-prefix=ERROR
++
++!-----------------------------------------
++! FRONTEND FLANG DRIVER (flang-new -fc1)
++!-----------------------------------------
++! RUN: rm -rf %t/dir-flang-new  && mkdir -p %t/dir-flang-new && %flang_fc1 -fsyntax-only -module-dir %t/dir-flang-new %s  2>&1
++! RUN: cat %t/dir-flang-new/m.mod | FileCheck %s --check-prefix=NOOPTION
++! RUN: rm -rf %t/dir-flang-new  && mkdir -p %t/dir-flang-new && %flang_fc1 -fsyntax-only -fdefault-real-8 -module-dir %t/dir-flang-new %s  2>&1
++! RUN: cat %t/dir-flang-new/m.mod | FileCheck %s --check-prefix=REAL8
++! RUN: rm -rf %t/dir-flang-new  && mkdir -p %t/dir-flang-new && %flang_fc1 -fsyntax-only -fdefault-real-8 -fdefault-double-8 -module-dir %t/dir-flang-new %s  2>&1
++! RUN: cat %t/dir-flang-new/m.mod | FileCheck %s --check-prefix=DOUBLE8
++! RUN: not %flang_fc1 -fsyntax-only -fdefault-double-8 %s  2>&1 | FileCheck %s --check-prefix=ERROR
++
++! NOOPTION: integer(4),parameter::real_kind=4_4
+ ! NOOPTION-NEXT: intrinsic::kind
+ ! NOOPTION-NEXT: integer(4),parameter::double_kind=8_4
+ 
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Driver/flang-openmp-version-macro.f90 llvm-project-aso/flang/test/Driver/flang-openmp-version-macro.f90
+--- llvm-project-aso-orig/flang/test/Driver/flang-openmp-version-macro.f90	2024-08-27 20:36:25.248172920 -0500
++++ llvm-project-aso/flang/test/Driver/flang-openmp-version-macro.f90	2024-11-23 20:39:47.184175353 -0600
+@@ -13,7 +13,7 @@
+ ! RUN: %flang_fc1 -fopenmp -fopenmp-version=51 -cpp -E %s | FileCheck %s --check-prefix=OPENMP-VERSION-51
+ ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -cpp -E %s | FileCheck %s --check-prefix=OPENMP-VERSION-52
+ 
+-! DEFAULT-OPENMP-VERSION: integer :: var1 = 199911
++! DEFAULT-OPENMP-VERSION: integer :: var1 = 202111
+ ! OPENMP-VERSION-11: integer :: var1 = 199911
+ ! OPENMP-VERSION-20: integer :: var1 = 200011
+ ! OPENMP-VERSION-25: integer :: var1 = 200505
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir llvm-project-aso/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+--- llvm-project-aso-orig/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir	2024-11-23 20:25:26.851275134 -0600
++++ llvm-project-aso/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir	2024-11-23 20:39:47.188175337 -0600
+@@ -1022,7 +1022,7 @@
+     %8 = fir.load %4 : !fir.ref<i32>
+     %9 = arith.addi %8, %c20_i32 : i32
+     fir.store %9 to %7 : !fir.ref<i32>
+-    omp.terminator
++        omp.terminator
+   }
+   return
+ }
+@@ -1059,7 +1059,7 @@
+     %9 = fir.load %arg0 : !fir.ref<i32>
+     %10 = arith.muli %9, %c10_i32 : i32
+     fir.store %10 to %arg1 : !fir.ref<i32>
+-    omp.terminator
++   omp.terminator
    }
    return
  }
-@@ -536,6 +539,7 @@
- // CHECK:             ^bb3:
- // CHECK:               omp.yield
- // CHECK:             }
-+// CHECK:             omp.terminator
- // CHECK:           }
- // CHECK:           llvm.return
- // CHECK:         }
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Integration/OpenMP/target-filtering.f90 llvm-project/flang/test/Integration/OpenMP/target-filtering.f90
---- llvm-project.orig/flang/test/Integration/OpenMP/target-filtering.f90	2024-02-15 09:48:32.619800701 -0600
-+++ llvm-project/flang/test/Integration/OpenMP/target-filtering.f90	2024-06-12 10:44:09.351614239 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 llvm-project-aso/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
+--- llvm-project-aso-orig/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90	2024-10-18 17:40:32.532992003 -0500
++++ llvm-project-aso/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90	2024-11-23 20:39:47.188175337 -0600
+@@ -72,80 +72,80 @@
+ ! CHECK-NEXT:    br label %omp.private.copy
+ 
+ ! CHECK:       omp.private.copy:                                 ; preds = %omp.region.cont
+-! CHECK-NEXT:    br label %omp.private.copy10
++! CHECK-NEXT:    br label %omp.private.copy11
+ 
+-! CHECK:       omp.private.copy10:                               ; preds = %omp.private.copy
++! CHECK:       omp.private.copy11:                               ; preds = %omp.private.copy
+ !                [begin firstprivate copy for first var]
+ !                [read the length, is it non-zero?]
+-! CHECK:         br i1 %{{.*}}, label %omp.private.copy11, label %omp.private.copy12
++! CHECK:         br i1 %{{.*}}, label %omp.private.copy12, label %omp.private.copy13
+ 
+-! CHECK:       omp.private.copy12:                               ; preds = %omp.private.copy11, %omp.private.copy10
+-! CHECK-NEXT:    br label %omp.region.cont9
++! CHECK:       omp.private.copy13:                               ; preds = %omp.private.copy12, %omp.private.copy11
++! CHECK-NEXT:    br label %omp.region.cont10
+ 
+-! CHECK:       omp.region.cont9:                                 ; preds = %omp.private.copy12
++! CHECK:       omp.region.cont10:                                 ; preds = %omp.private.copy13
+ ! CHECK-NEXT:    %{{.*}} = phi ptr
+-! CHECK-NEXT:    br label %omp.private.copy14
++! CHECK-NEXT:    br label %omp.private.copy15
+ 
+-! CHECK:       omp.private.copy14:                               ; preds = %omp.region.cont9
++! CHECK:       omp.private.copy15:                               ; preds = %omp.region.cont10
+ !                [begin firstprivate copy for second var]
+ !                [read the length, is it non-zero?]
+-! CHECK:         br i1 %{{.*}}, label %omp.private.copy15, label %omp.private.copy16
++! CHECK:         br i1 %{{.*}}, label %omp.private.copy16, label %omp.private.copy17
+ 
+-! CHECK:       omp.private.copy16:                               ; preds = %omp.private.copy15, %omp.private.copy14
+-! CHECK-NEXT:    br label %omp.region.cont13
++! CHECK:       omp.private.copy17:                               ; preds = %omp.private.copy16, %omp.private.copy15
++! CHECK-NEXT:    br label %omp.region.cont14
+ 
+-! CHECK:       omp.region.cont13:                                ; preds = %omp.private.copy16
++! CHECK:       omp.region.cont14:                                ; preds = %omp.private.copy17
+ ! CHECK-NEXT:    %{{.*}} = phi ptr
+ ! CHECK-NEXT:    br label %omp.reduction.init
+ 
+-! CHECK:       omp.reduction.init:                               ; preds = %omp.region.cont13
++! CHECK:       omp.reduction.init:                               ; preds = %omp.region.cont14
+ !                [deffered stores for results of reduction alloc regions]
+ ! CHECK:         br label %[[VAL_96:.*]]
+ 
+ ! CHECK:       omp.reduction.neutral:                            ; preds = %omp.reduction.init
+ !                [start of reduction initialization region]
+ !                [null check:]
+-! CHECK:         br i1 %{{.*}}, label %omp.reduction.neutral18, label %omp.reduction.neutral19
++! CHECK:         br i1 %{{.*}}, label %omp.reduction.neutral19, label %omp.reduction.neutral20
+ 
+-! CHECK:       omp.reduction.neutral19:                          ; preds = %omp.reduction.neutral
++! CHECK:       omp.reduction.neutral20:                          ; preds = %omp.reduction.neutral
+ !                [malloc and assign the default value to the reduction variable]
+-! CHECK:         br label %omp.reduction.neutral20
++! CHECK:         br label %omp.reduction.neutral21
+ 
+-! CHECK:       omp.reduction.neutral20:                          ; preds = %omp.reduction.neutral18, %omp.reduction.neutral19
+-! CHECK-NEXT:    br label %omp.region.cont17
++! CHECK:       omp.reduction.neutral21:                          ; preds = %omp.reduction.neutral19, %omp.reduction.neutral20
++! CHECK-NEXT:    br label %omp.region.cont18
+ 
+-! CHECK:       omp.region.cont17:                                ; preds = %omp.reduction.neutral20
++! CHECK:       omp.region.cont18:                                ; preds = %omp.reduction.neutral21
+ ! CHECK-NEXT:    %{{.*}} = phi ptr
+-! CHECK-NEXT:    br label %omp.reduction.neutral22
++! CHECK-NEXT:    br label %omp.reduction.neutral23
+ 
+-! CHECK:       omp.reduction.neutral22:                          ; preds = %omp.region.cont17
++! CHECK:       omp.reduction.neutral23:                          ; preds = %omp.region.cont18
+ !                [start of reduction initialization region]
+ !                [null check:]
+-! CHECK:         br i1 %{{.*}}, label %omp.reduction.neutral23, label %omp.reduction.neutral24
++! CHECK:         br i1 %{{.*}}, label %omp.reduction.neutral24, label %omp.reduction.neutral25
+ 
+-! CHECK:       omp.reduction.neutral24:                          ; preds = %omp.reduction.neutral22
++! CHECK:       omp.reduction.neutral25:                          ; preds = %omp.reduction.neutral23
+ !                [malloc and assign the default value to the reduction variable]
+-! CHECK:         br label %omp.reduction.neutral25
++! CHECK:         br label %omp.reduction.neutral26
+ 
+-! CHECK:       omp.reduction.neutral25:                          ; preds = %omp.reduction.neutral23, %omp.reduction.neutral24
+-! CHECK-NEXT:    br label %omp.region.cont21
++! CHECK:       omp.reduction.neutral26:                          ; preds = %omp.reduction.neutral24, %omp.reduction.neutral25
++! CHECK-NEXT:    br label %omp.region.cont22
+ 
+-! CHECK:       omp.region.cont21:                                ; preds = %omp.reduction.neutral25
++! CHECK:       omp.region.cont22:                                ; preds = %omp.reduction.neutral26
+ ! CHECK-NEXT:    %{{.*}} = phi ptr
+ ! CHECK-NEXT:    br label %omp.par.region
+ 
+-! CHECK:       omp.par.region:                                   ; preds = %omp.region.cont21
+-! CHECK-NEXT:    br label %omp.par.region27
++! CHECK:       omp.par.region:                                   ; preds = %omp.region.cont22
++! CHECK-NEXT:    br label %omp.par.region28
+ 
+-! CHECK:       omp.par.region27:                                 ; preds = %omp.par.region
++! CHECK:       omp.par.region28:                                 ; preds = %omp.par.region
+ !                [call SUM runtime function]
+ !                [if (sum(a) == 1)]
+-! CHECK:         br i1 %{{.*}}, label %omp.par.region28, label %omp.par.region29
++! CHECK:         br i1 %{{.*}}, label %omp.par.region29, label %omp.par.region30
+ 
+-! CHECK:       omp.par.region29:                                 ; preds = %omp.par.region27
+-! CHECK-NEXT:    br label %omp.region.cont26
++! CHECK:       omp.par.region30:                                 ; preds = %omp.par.region28
++! CHECK-NEXT:    br label %omp.region.cont27
+ 
+-! CHECK:       omp.region.cont26:                                ; preds = %omp.par.region28, %omp.par.region29
++! CHECK:       omp.region.cont27:                                ; preds = %omp.par.region29, %omp.par.region30
+ !                [omp parallel region done, call into the runtime to complete reduction]
+ ! CHECK:         %[[VAL_233:.*]] = call i32 @__kmpc_reduce(
+ ! CHECK:         switch i32 %[[VAL_233]], label %reduce.finalize [
+@@ -153,16 +153,16 @@
+ ! CHECK-NEXT:      i32 2, label %reduce.switch.atomic
+ ! CHECK-NEXT:    ]
+ 
+-! CHECK:       reduce.switch.atomic:                             ; preds = %omp.region.cont26
++! CHECK:       reduce.switch.atomic:                             ; preds = %omp.region.cont27
+ ! CHECK-NEXT:    unreachable
+ 
+-! CHECK:       reduce.switch.nonatomic:                          ; preds = %omp.region.cont26
++! CHECK:       reduce.switch.nonatomic:                          ; preds = %omp.region.cont27
+ ! CHECK-NEXT:    %[[red_private_value_0:.*]] = load ptr, ptr %{{.*}}, align 8
+ ! CHECK-NEXT:    br label %omp.reduction.nonatomic.body
+ 
+ !              [various blocks implementing the reduction]
+ 
+-! CHECK:       omp.region.cont35:                                ; preds =
++! CHECK:       omp.region.cont36:                                ; preds =
+ ! CHECK-NEXT:    %{{.*}} = phi ptr
+ ! CHECK-NEXT:    call void @__kmpc_end_reduce(
+ ! CHECK-NEXT:    br label %reduce.finalize
+@@ -176,79 +176,79 @@
+ 
+ ! CHECK:       omp.reduction.cleanup:                            ; preds = %omp.par.pre_finalize
+ !                [null check]
+-! CHECK:         br i1 %{{.*}}, label %omp.reduction.cleanup41, label %omp.reduction.cleanup42
++! CHECK:         br i1 %{{.*}}, label %omp.reduction.cleanup42, label %omp.reduction.cleanup43
+ 
+-! CHECK:       omp.reduction.cleanup42:                          ; preds = %omp.reduction.cleanup41, %omp.reduction.cleanup
+-! CHECK-NEXT:    br label %omp.region.cont40
++! CHECK:       omp.reduction.cleanup43:                          ; preds = %omp.reduction.cleanup42, %omp.reduction.cleanup
++! CHECK-NEXT:    br label %omp.region.cont41
+ 
+-! CHECK:       omp.region.cont40:                                ; preds = %omp.reduction.cleanup42
++! CHECK:       omp.region.cont41:                                ; preds = %omp.reduction.cleanup43
+ ! CHECK-NEXT:    %{{.*}} = load ptr, ptr
+-! CHECK-NEXT:    br label %omp.reduction.cleanup44
++! CHECK-NEXT:    br label %omp.reduction.cleanup45
+ 
+-! CHECK:       omp.reduction.cleanup44:                          ; preds = %omp.region.cont40
++! CHECK:       omp.reduction.cleanup45:                          ; preds = %omp.region.cont41
+ !                [null check]
+-! CHECK:         br i1 %{{.*}}, label %omp.reduction.cleanup45, label %omp.reduction.cleanup46
++! CHECK:         br i1 %{{.*}}, label %omp.reduction.cleanup46, label %omp.reduction.cleanup47
+ 
+-! CHECK:       omp.reduction.cleanup46:                          ; preds = %omp.reduction.cleanup45, %omp.reduction.cleanup44
+-! CHECK-NEXT:    br label %omp.region.cont43
++! CHECK:       omp.reduction.cleanup47:                          ; preds = %omp.reduction.cleanup46, %omp.reduction.cleanup45
++! CHECK-NEXT:    br label %omp.region.cont44
+ 
+-! CHECK:       omp.region.cont43:                                ; preds = %omp.reduction.cleanup46
++! CHECK:       omp.region.cont44:                                ; preds = %omp.reduction.cleanup47
+ ! CHECK-NEXT:    br label %omp.private.dealloc
+ 
+-! CHECK:       omp.private.dealloc:                              ; preds = %omp.region.cont43
++! CHECK:       omp.private.dealloc:                              ; preds = %omp.region.cont44
+ !                [null check]
+-! CHECK:         br i1 %{{.*}}, label %omp.private.dealloc48, label %omp.private.dealloc49
++! CHECK:         br i1 %{{.*}}, label %omp.private.dealloc49, label %omp.private.dealloc50
+ 
+-! CHECK:       omp.private.dealloc49:                            ; preds = %omp.private.dealloc48, %omp.private.dealloc
+-! CHECK-NEXT:    br label %omp.region.cont47
++! CHECK:       omp.private.dealloc50:                            ; preds = %omp.private.dealloc49, %omp.private.dealloc
++! CHECK-NEXT:    br label %omp.region.cont48
+ 
+-! CHECK:       omp.region.cont47:                                ; preds = %omp.private.dealloc49
+-! CHECK-NEXT:    br label %omp.private.dealloc51
++! CHECK:       omp.region.cont48:                                ; preds = %omp.private.dealloc50
++! CHECK-NEXT:    br label %omp.private.dealloc52
+ 
+-! CHECK:       omp.private.dealloc51:                            ; preds = %omp.region.cont47
++! CHECK:       omp.private.dealloc52:                            ; preds = %omp.region.cont48
+ !                [null check]
+-! CHECK:         br i1 %{{.*}}, label %omp.private.dealloc52, label %omp.private.dealloc53
++! CHECK:         br i1 %{{.*}}, label %omp.private.dealloc53, label %omp.private.dealloc54
+ 
+-! CHECK:       omp.private.dealloc53:                            ; preds = %omp.private.dealloc52, %omp.private.dealloc51
+-! CHECK-NEXT:    br label %omp.region.cont50
++! CHECK:       omp.private.dealloc54:                            ; preds = %omp.private.dealloc53, %omp.private.dealloc52
++! CHECK-NEXT:    br label %omp.region.cont51
+ 
+-! CHECK:       omp.region.cont50:                                ; preds = %omp.private.dealloc53
++! CHECK:       omp.region.cont51:                                ; preds = %omp.private.dealloc54
+ ! CHECK-NEXT:    br label %omp.par.outlined.exit.exitStub
+ 
+-! CHECK:       omp.private.dealloc52:                            ; preds = %omp.private.dealloc51
++! CHECK:       omp.private.dealloc53:                            ; preds = %omp.private.dealloc52
+ !                [dealloc memory]
+-! CHECK:         br label %omp.private.dealloc53
++! CHECK:         br label %omp.private.dealloc54
+ 
+-! CHECK:       omp.private.dealloc48:                            ; preds = %omp.private.dealloc
++! CHECK:       omp.private.dealloc49:                            ; preds = %omp.private.dealloc
+ !                [dealloc memory]
+-! CHECK:         br label %omp.private.dealloc49
++! CHECK:         br label %omp.private.dealloc50
+ 
+-! CHECK:       omp.reduction.cleanup45:                          ; preds = %omp.reduction.cleanup44
++! CHECK:       omp.reduction.cleanup46:                          ; preds = %omp.reduction.cleanup45
+ ! CHECK-NEXT:    call void @free(
+-! CHECK-NEXT:    br label %omp.reduction.cleanup46
++! CHECK-NEXT:    br label %omp.reduction.cleanup47
+ 
+-! CHECK:       omp.reduction.cleanup41:                          ; preds = %omp.reduction.cleanup
++! CHECK:       omp.reduction.cleanup42:                          ; preds = %omp.reduction.cleanup
+ ! CHECK-NEXT:    call void @free(
+-! CHECK-NEXT:    br label %omp.reduction.cleanup42
++! CHECK-NEXT:    br label %omp.reduction.cleanup43
+ 
+-! CHECK:       omp.par.region28:                                 ; preds = %omp.par.region27
++! CHECK:       omp.par.region29:                                 ; preds = %omp.par.region28
+ ! CHECK-NEXT:    call {} @_FortranAStopStatement
+ 
+-! CHECK:       omp.reduction.neutral23:                          ; preds = %omp.reduction.neutral22
++! CHECK:       omp.reduction.neutral24:                          ; preds = %omp.reduction.neutral23
+ !                [source length was zero: finish initializing array]
+-! CHECK:         br label %omp.reduction.neutral25
++! CHECK:         br label %omp.reduction.neutral26
+ 
+-! CHECK:       omp.reduction.neutral18:                          ; preds = %omp.reduction.neutral
++! CHECK:       omp.reduction.neutral19:                          ; preds = %omp.reduction.neutral
+ !                [source length was zero: finish initializing array]
+-! CHECK:         br label %omp.reduction.neutral20
++! CHECK:         br label %omp.reduction.neutral21
+ 
+-! CHECK:       omp.private.copy15:                               ; preds = %omp.private.copy14
++! CHECK:       omp.private.copy16:                               ; preds = %omp.private.copy15
+ !                [source length was non-zero: call assign runtime]
+-! CHECK:         br label %omp.private.copy16
++! CHECK:         br label %omp.private.copy17
+ 
+-! CHECK:       omp.private.copy11:                               ; preds = %omp.private.copy10
++! CHECK:       omp.private.copy12:                               ; preds = %omp.private.copy11
+ !                [source length was non-zero: call assign runtime]
+-! CHECK:         br label %omp.private.copy12
++! CHECK:         br label %omp.private.copy13
+ 
+ ! CHECK:       omp.private.alloc1:                               ; preds = %omp.private.alloc
+ !                [var extent was non-zero: malloc a private array]
+@@ -258,5 +258,5 @@
+ !                [var extent was non-zero: malloc a private array]
+ ! CHECK:         br label %omp.private.alloc8
+ 
+-! CHECK:       omp.par.outlined.exit.exitStub:                   ; preds = %omp.region.cont50
++! CHECK:       omp.par.outlined.exit.exitStub:                   ; preds = %omp.region.cont51
+ ! CHECK-NEXT:    ret void
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Integration/OpenMP/target-filtering.f90 llvm-project-aso/flang/test/Integration/OpenMP/target-filtering.f90
+--- llvm-project-aso-orig/flang/test/Integration/OpenMP/target-filtering.f90	2024-08-27 20:36:25.268172720 -0500
++++ llvm-project-aso/flang/test/Integration/OpenMP/target-filtering.f90	2024-11-23 20:39:47.188175337 -0600
 @@ -7,7 +7,7 @@
  !===----------------------------------------------------------------------===!
  
@@ -7448,1776 +8732,395 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Integration/OpenMP/ta
  
  !HOST: define {{.*}}@{{.*}}before{{.*}}(
  !DEVICE-NOT: define {{.*}}@before{{.*}}(
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90 llvm-project/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90	1969-12-31 18:00:00.000000000 -0600
-+++ llvm-project/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -0,0 +1,23 @@
-+! RUN: %flang_fc1 -fopenmp -emit-fir %s -o - | FileCheck %s
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenACC/acc-enter-data.f90 llvm-project-aso/flang/test/Lower/OpenACC/acc-enter-data.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenACC/acc-enter-data.f90	2024-08-27 20:36:25.288172520 -0500
++++ llvm-project-aso/flang/test/Lower/OpenACC/acc-enter-data.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -808,10 +808,10 @@
+ !CHECK:           %[[VAL_42:.*]] = arith.constant 1 : index
+ !CHECK:           %[[VAL_43:.*]] = arith.constant 1 : index
+ !CHECK:           %[[VAL_44:.*]] = arith.subi %[[VAL_43]], %[[VAL_38]]#0 : index
+-!CHECK:           %[[VAL_45:.*]] = acc.bounds lowerbound(%[[VAL_44]] : index) upperbound(%[[VAL_44]] : index) extent(%[[VAL_42]] : index) stride(%[[VAL_42]] : index) startIdx(%[[VAL_38]]#0 : index)
++!CHECK:           %[[VAL_45:.*]] = acc.bounds lowerbound(%[[VAL_44]] : index) upperbound(%[[VAL_44]] : index) extent(%[[VAL_38]]#1 : index) stride(%[[VAL_42]] : index) startIdx(%[[VAL_38]]#0 : index)
+ !CHECK:           %[[VAL_46:.*]] = arith.constant 2 : index
+ !CHECK:           %[[VAL_47:.*]] = arith.subi %[[VAL_46]], %[[VAL_40]]#0 : index
+-!CHECK:           %[[VAL_48:.*]] = acc.bounds lowerbound(%[[VAL_47]] : index) upperbound(%[[VAL_47]] : index) extent(%[[VAL_42]] : index) stride(%[[VAL_42]] : index) startIdx(%[[VAL_40]]#0 : index)
++!CHECK:           %[[VAL_48:.*]] = acc.bounds lowerbound(%[[VAL_47]] : index) upperbound(%[[VAL_47]] : index) extent(%[[VAL_40]]#1 : index) stride(%[[VAL_42]] : index) startIdx(%[[VAL_40]]#0 : index)
+ !CHECK:           %[[CREATE:.*]] = acc.create varPtr(%[[VAL_41]] : !fir.heap<!fir.array<?x?xf32>>) bounds(%[[VAL_45]], %[[VAL_48]]) -> !fir.heap<!fir.array<?x?xf32>> {name = "e(2_8)%a(1,2)", structured = false}
+ !CHECK:           acc.enter_data dataOperands(%[[CREATE]] : !fir.heap<!fir.array<?x?xf32>>)
+ 
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/array-bounds.f90 llvm-project-aso/flang/test/Lower/OpenMP/array-bounds.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/array-bounds.f90	2024-11-23 20:25:26.851275134 -0600
++++ llvm-project-aso/flang/test/Lower/OpenMP/array-bounds.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -65,13 +65,15 @@
+     end subroutine assumed_shape_array
+ 
+ 
++
++
+ !HOST-LABEL: func.func @_QMassumed_array_routinesPassumed_size_array(
+ !HOST-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xi32>> {fir.bindc_name = "arr_read_write"}) {
+ !HOST: %[[ARG0_SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
+ !HOST: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]](%[[ARG0_SHAPE]]) dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<intent_inout>, uniq_name = "_QMassumed_array_routinesFassumed_size_arrayEarr_read_write"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
+ !HOST: %[[ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMassumed_array_routinesFassumed_size_arrayEi"}
+ !HOST: %[[DIMS0:.*]]:3 = fir.box_dims %[[ARG0_DECL]]#0, %c0{{.*}} : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+-!HOST: %[[C4_1:.*]] = arith.subi %c4, %c1{{.*}} : index
++!HOST: %[[C4_1:.*]] = arith.subi %c4{{.*}}, %c1{{.*}} : index
+ !HOST: %[[EXT:.*]] = arith.addi %[[C4_1]], %c1{{.*}} : index
+ !HOST: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%c1{{.*}} : index) upper_bound(%c4{{.*}} : index) extent(%[[EXT]] : index) stride(%[[DIMS0]]#2 : index) start_idx(%c1{{.*}} : index) {stride_in_bytes = true}
+ !HOST: %[[MAP:.*]] = omp.map.info var_ptr(%[[ARG0_DECL]]#1 : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xi32>> {name = "arr_read_write(2:5)"}
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/eval-outside-target.f90 llvm-project-aso/flang/test/Lower/OpenMP/eval-outside-target.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/eval-outside-target.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Lower/OpenMP/eval-outside-target.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,157 @@
++! The "thread_limit" clause was added to the "target" construct in OpenMP 5.1.
++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 %s -o - | FileCheck %s --check-prefixes=BOTH,HOST
++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefixes=BOTH,DEVICE
++
++! BOTH-LABEL: func.func @_QPteams
++subroutine teams()
++  ! BOTH: omp.target
++
++  ! HOST-SAME: host_eval(%{{.*}} -> %[[NUM_TEAMS:.*]], %{{.*}} -> %[[THREAD_LIMIT:.*]] : i32, i32)
++  
++  ! DEVICE-NOT: host_eval({{.*}})
++  ! DEVICE-SAME: {
++  !$omp target
 +
-+! Check that this testcase is lowered to FIR successfully.
-+! CHECK: omp.target trip_count
++  ! BOTH: omp.teams
 +
-+module Test
-+    use, intrinsic :: ISO_Fortran_env, only: REAL64,INT64
-+    implicit none
-+    integer(kind=INT64) :: N
-+    real(kind=REAL64), allocatable :: A(:)
++  ! HOST-SAME: num_teams( to %[[NUM_TEAMS]] : i32) thread_limit(%[[THREAD_LIMIT]] : i32)
++  ! DEVICE-SAME: num_teams({{.*}}) thread_limit({{.*}})
++  !$omp teams num_teams(1) thread_limit(2)
++  call foo()
++  !$omp end teams
 +
-+    contains
-+        subroutine init_arrays(initA)
-+            implicit none
-+            real(kind=REAL64), intent(in) :: initA
-+            integer(kind=INT64) :: i
-+            !$omp target teams distribute parallel do
-+            do i = 1, N
-+                A(i) = initA
-+            end do
-+        end subroutine init_arrays
++  !$omp end target
 +
-+end module Test
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/function-filtering-2.f90 llvm-project/flang/test/Lower/OpenMP/function-filtering-2.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/function-filtering-2.f90	2024-06-12 10:43:12.676209906 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/function-filtering-2.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -1,9 +1,9 @@
- ! RUN: %flang_fc1 -fopenmp -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-HOST %s
- ! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s
--! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-DEVICE %s
--! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s
-+! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-DEVICE %s
-+! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s
- ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s
--! RUN: bbc -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
-+! RUN: bbc -target amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
- 
- ! MLIR: func.func @{{.*}}implicit_invocation() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>}
- ! MLIR: return
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/function-filtering-3.f90 llvm-project/flang/test/Lower/OpenMP/function-filtering-3.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/function-filtering-3.f90	2024-06-12 10:43:12.676209906 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/function-filtering-3.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -1,9 +1,9 @@
- ! RUN: %flang_fc1 -fopenmp -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-HOST,LLVM-ALL %s
- ! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s
--! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s
--! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
-+! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s
-+! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
- ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s
--! RUN: bbc -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
-+! RUN: bbc -fopenmp -target amdgcn-amd-amdhsa -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
- 
- ! Check that the correct LLVM IR functions are kept for the host and device
- ! after running the whole set of translation and transformation passes from
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/function-filtering.f90 llvm-project/flang/test/Lower/OpenMP/function-filtering.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/function-filtering.f90	2024-02-15 09:48:32.631800569 -0600
-+++ llvm-project/flang/test/Lower/OpenMP/function-filtering.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -1,9 +1,9 @@
- ! RUN: %flang_fc1 -fopenmp -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-HOST,LLVM-ALL %s
- ! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s
--! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s
--! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
-+! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s
-+! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
- ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s
--! RUN: bbc -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
-+! RUN: bbc -target amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
- 
- ! Check that the correct LLVM IR functions are kept for the host and device
- ! after running the whole set of translation and transformation passes from
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/if-clause.f90 llvm-project/flang/test/Lower/OpenMP/if-clause.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/if-clause.f90	2024-06-12 10:43:12.676209906 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/if-clause.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -9,23 +9,191 @@
-   integer :: i
- 
-   ! TODO When they are supported, add tests for:
--  ! - DISTRIBUTE PARALLEL DO
--  ! - DISTRIBUTE PARALLEL DO SIMD
--  ! - DISTRIBUTE SIMD
-   ! - PARALLEL SECTIONS
-   ! - PARALLEL WORKSHARE
--  ! - TARGET PARALLEL
--  ! - TARGET TEAMS DISTRIBUTE
--  ! - TARGET TEAMS DISTRIBUTE PARALLEL DO
--  ! - TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD
--  ! - TARGET TEAMS DISTRIBUTE SIMD
--  ! - TARGET UPDATE
-   ! - TASKLOOP
-   ! - TASKLOOP SIMD
--  ! - TEAMS DISTRIBUTE
--  ! - TEAMS DISTRIBUTE PARALLEL DO
--  ! - TEAMS DISTRIBUTE PARALLEL DO SIMD
--  ! - TEAMS DISTRIBUTE SIMD
++  ! BOTH: omp.teams
++  ! BOTH-SAME: num_teams({{.*}}) thread_limit({{.*}}) {
++  !$omp teams num_teams(1) thread_limit(2)
++  call foo()
++  !$omp end teams
++end subroutine teams
 +
-+  ! ----------------------------------------------------------------------------
-+  ! DISTRIBUTE PARALLEL DO SIMD
-+  ! ----------------------------------------------------------------------------
-+  !$omp teams
++! BOTH-LABEL: func.func @_QPdistribute_parallel_do
++subroutine distribute_parallel_do()
++  ! BOTH: omp.target
++  
++  ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]], %{{.*}} -> %[[NUM_THREADS:.*]] : i32, i32, i32, i32)
++  
++  ! DEVICE-NOT: host_eval({{.*}})
++  ! DEVICE-SAME: {
 +
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp distribute parallel do simd
-+  do i = 1, 10
-+  end do
-+  !$omp end distribute parallel do simd
++  ! BOTH: omp.teams
++  !$omp target teams
 +
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp distribute parallel do simd if(.true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end distribute parallel do simd
++  ! BOTH: omp.parallel
 +
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp distribute parallel do simd if(parallel: .true.) if(simd: .false.)
-+  do i = 1, 10
-+  end do
-+  !$omp end distribute parallel do simd
++  ! HOST-SAME: num_threads(%[[NUM_THREADS]] : i32)
++  ! DEVICE-SAME: num_threads({{.*}})
 +
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp distribute parallel do simd if(parallel: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end distribute parallel do simd
-+
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp distribute parallel do simd if(simd: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end distribute parallel do simd
-+
-+  !$omp end teams
-+
-+  ! ----------------------------------------------------------------------------
-+  ! DISTRIBUTE PARALLEL DO
-+  ! ----------------------------------------------------------------------------
-+  !$omp teams
-+
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp distribute parallel do
-+  do i = 1, 10
-+  end do
-+  !$omp end distribute parallel do
++  ! BOTH: omp.distribute
++  ! BOTH-NEXT: omp.wsloop
++  ! BOTH-NEXT: omp.loop_nest
 +
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp distribute parallel do if(.true.)
-+  do i = 1, 10
++  ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
++  !$omp distribute parallel do num_threads(1)
++  do i=1,10
++    call foo()
 +  end do
 +  !$omp end distribute parallel do
-+
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp distribute parallel do if(parallel: .true.)
-+  do i = 1, 10
++  !$omp end target teams
++
++  ! BOTH: omp.target
++  ! BOTH-NOT: host_eval({{.*}})
++  ! BOTH-SAME: {
++  ! BOTH: omp.teams
++  !$omp target teams
++  call foo() !< Prevents this from being SPMD.
++
++  ! BOTH: omp.parallel
++  ! BOTH-SAME: num_threads({{.*}})
++  ! BOTH: omp.distribute
++  ! BOTH-NEXT: omp.wsloop
++  !$omp distribute parallel do num_threads(1)
++  do i=1,10
++    call foo()
 +  end do
 +  !$omp end distribute parallel do
++  !$omp end target teams
 +
-+  !$omp end teams
-+
-+  ! ----------------------------------------------------------------------------
-+  ! DISTRIBUTE SIMD
-+  ! ----------------------------------------------------------------------------
++  ! BOTH: omp.teams
 +  !$omp teams
-+
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp distribute simd
-+  do i = 1, 10
-+  end do
-+  !$omp end distribute simd
-+
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp distribute simd if(.true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end distribute simd
-+
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp distribute simd if(simd: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end distribute simd
-+
-+  !$omp end teams
- 
-   ! ----------------------------------------------------------------------------
-   ! DO SIMD
-@@ -33,18 +201,31 @@
-   ! CHECK:      omp.wsloop
-   ! CHECK-NOT:  if({{.*}})
-   ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp do simd
-   do i = 1, 10
-   end do
-   !$omp end do simd
- 
-   ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp do simd if(.true.)
-   do i = 1, 10
-   end do
-   !$omp end do simd
- 
-   ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp do simd if(simd: .true.)
-   do i = 1, 10
-   end do
-@@ -62,12 +243,14 @@
- 
-   ! CHECK:      omp.parallel
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp parallel if(.true.)
-   i = 10
-   !$omp end parallel
- 
-   ! CHECK:      omp.parallel
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp parallel if(parallel: .true.)
-   i = 10
-   !$omp end parallel
-@@ -78,6 +261,9 @@
-   ! CHECK:      omp.parallel
-   ! CHECK-NOT:  if({{.*}})
-   ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp parallel do
-   do i = 1, 10
-   end do
-@@ -85,6 +271,10 @@
- 
-   ! CHECK:      omp.parallel
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp parallel do if(.true.)
-   do i = 1, 10
-   end do
-@@ -92,6 +282,10 @@
- 
-   ! CHECK:      omp.parallel
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp parallel do if(parallel: .true.)
-   do i = 1, 10
-   end do
-@@ -106,6 +300,9 @@
-   ! CHECK:      omp.wsloop
-   ! CHECK-NOT:  if({{.*}})
-   ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp parallel do simd
-   do i = 1, 10
-   end do
-@@ -113,7 +310,13 @@
- 
-   ! CHECK:      omp.parallel
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp parallel do simd if(.true.)
-   do i = 1, 10
-   end do
-@@ -121,7 +324,13 @@
- 
-   ! CHECK:      omp.parallel
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp parallel do simd if(parallel: .true.) if(simd: .false.)
-   do i = 1, 10
-   end do
-@@ -132,6 +341,9 @@
-   ! CHECK:      omp.wsloop
-   ! CHECK-NOT:  if({{.*}})
-   ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp parallel do simd if(parallel: .true.)
-   do i = 1, 10
-   end do
-@@ -141,6 +353,11 @@
-   ! CHECK-NOT:  if({{.*}})
-   ! CHECK-SAME: {
-   ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp parallel do simd if(simd: .true.)
-   do i = 1, 10
-   end do
-@@ -159,6 +376,7 @@
- 
-   ! CHECK:      omp.simd
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp simd if(.true.)
-   do i = 1, 10
-   end do
-@@ -166,6 +384,7 @@
- 
-   ! CHECK:      omp.simd
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp simd if(simd: .true.)
-   do i = 1, 10
-   end do
-@@ -182,11 +401,13 @@
- 
-   ! CHECK:      omp.target
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target if(.true.)
-   !$omp end target
- 
-   ! CHECK:      omp.target
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target if(target: .true.)
-   !$omp end target
- 
-@@ -201,11 +422,13 @@
- 
-   ! CHECK:      omp.target_data
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target data map(tofrom: i) if(.true.)
-   !$omp end target data
- 
-   ! CHECK:      omp.target_data
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target data map(tofrom: i) if(target data: .true.)
-   !$omp end target data
- 
-@@ -214,7 +437,6 @@
-   ! ----------------------------------------------------------------------------
-   ! CHECK:      omp.target_enter_data
-   ! CHECK-NOT:  if({{.*}})
--  ! CHECK-SAME: map
-   !$omp target enter data map(to: i)
- 
-   ! CHECK:      omp.target_enter_data
-@@ -230,7 +452,6 @@
-   ! ----------------------------------------------------------------------------
-   ! CHECK:      omp.target_exit_data
-   ! CHECK-NOT:  if({{.*}})
--  ! CHECK-SAME: map
-   !$omp target exit data map(from: i)
- 
-   ! CHECK:      omp.target_exit_data
-@@ -250,6 +471,9 @@
-   ! CHECK:      omp.parallel
-   ! CHECK-NOT:  if({{.*}})
-   ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target parallel do
-   do i = 1, 10
-   end do
-@@ -257,8 +481,13 @@
- 
-   ! CHECK:      omp.target
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.parallel
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target parallel do if(.true.)
-   do i = 1, 10
-   end do
-@@ -266,8 +495,13 @@
- 
-   ! CHECK:      omp.target
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.parallel
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target parallel do if(target: .true.) if(parallel: .false.)
-   do i = 1, 10
-   end do
-@@ -275,9 +509,13 @@
- 
-   ! CHECK:      omp.target
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.parallel
-   ! CHECK-NOT:  if({{.*}})
-   ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target parallel do if(target: .true.)
-   do i = 1, 10
-   end do
-@@ -288,6 +526,10 @@
-   ! CHECK-SAME: {
-   ! CHECK:      omp.parallel
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target parallel do if(parallel: .true.)
-   do i = 1, 10
-   end do
-@@ -305,6 +547,9 @@
-   ! CHECK:      omp.wsloop
-   ! CHECK-NOT:  if({{.*}})
-   ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target parallel do simd
-   do i = 1, 10
-   end do
-@@ -312,9 +557,16 @@
- 
-   ! CHECK:      omp.target
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.parallel
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target parallel do simd if(.true.)
-   do i = 1, 10
-   end do
-@@ -322,9 +574,16 @@
- 
-   ! CHECK:      omp.target
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.parallel
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target parallel do simd if(target: .true.) if(parallel: .false.) &
-   !$omp&                        if(simd: .true.)
-   do i = 1, 10
-@@ -333,12 +592,16 @@
- 
-   ! CHECK:      omp.target
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.parallel
-   ! CHECK-NOT:  if({{.*}})
-   ! CHECK-SAME: {
-   ! CHECK:      omp.wsloop
-   ! CHECK-NOT:  if({{.*}})
-   ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target parallel do simd if(target: .true.)
-   do i = 1, 10
-   end do
-@@ -349,13 +612,72 @@
-   ! CHECK-SAME: {
-   ! CHECK:      omp.parallel
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target parallel do simd if(parallel: .true.) if(simd: .false.)
-   do i = 1, 10
-   end do
-   !$omp end target parallel do simd
- 
-   ! ----------------------------------------------------------------------------
-+  ! TARGET PARALLEL
-+  ! ----------------------------------------------------------------------------
-+  ! CHECK:      omp.target
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target parallel
-+  i = 1
-+  !$omp end target parallel
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target parallel if(.true.)
-+  i = 1
-+  !$omp end target parallel
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target parallel if(target: .true.) if(parallel: .false.)
-+  i = 1
-+  !$omp end target parallel
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target parallel if(target: .true.)
-+  i = 1
-+  !$omp end target parallel
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target parallel if(parallel: .true.)
-+  i = 1
-+  !$omp end target parallel
-+
-+  ! ----------------------------------------------------------------------------
-   ! TARGET SIMD
-   ! ----------------------------------------------------------------------------
-   ! CHECK:      omp.target
-@@ -371,8 +693,10 @@
- 
-   ! CHECK:      omp.target
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.simd
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target simd if(.true.)
-   do i = 1, 10
-   end do
-@@ -380,8 +704,10 @@
- 
-   ! CHECK:      omp.target
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.simd
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target simd if(target: .true.) if(simd: .false.)
-   do i = 1, 10
-   end do
-@@ -389,6 +715,7 @@
- 
-   ! CHECK:      omp.target
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.simd
-   ! CHECK-NOT:  if({{.*}})
-   ! CHECK-SAME: {
-@@ -402,14 +729,438 @@
-   ! CHECK-SAME: {
-   ! CHECK:      omp.simd
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target simd if(simd: .true.)
-   do i = 1, 10
-   end do
-   !$omp end target simd
- 
-   ! ----------------------------------------------------------------------------
--  ! TARGET TEAMS
-+  ! TARGET TEAMS DISTRIBUTE
-+  ! ----------------------------------------------------------------------------
-+  ! CHECK:      omp.target
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute if(.true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute if(target: .true.) if(teams: .false.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute if(target: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute if(teams: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute
-+
-+  ! ----------------------------------------------------------------------------
-+  ! TARGET TEAMS DISTRIBUTE PARALLEL DO
-+  ! ----------------------------------------------------------------------------
-+  ! CHECK:      omp.target
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute parallel do
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute parallel do if(.true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute parallel do if(target: .true.) if(teams: .false.) if(parallel: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute parallel do if(target: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute parallel do if(teams: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute parallel do if(parallel: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do
-+
-+  ! ----------------------------------------------------------------------------
-+  ! TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD
-+  ! ----------------------------------------------------------------------------
-+  ! CHECK:      omp.target
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute parallel do simd
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do simd
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute parallel do simd if(.true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do simd
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute parallel do simd if(target: .true.) if(teams: .false.) if(parallel: .true.) if(simd: .false.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do simd
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute parallel do simd if(target: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do simd
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute parallel do simd if(teams: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do simd
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute parallel do simd if(parallel: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do simd
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute parallel do simd if(simd: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do simd
-+
-+  ! ----------------------------------------------------------------------------
-+  ! TARGET TEAMS DISTRIBUTE SIMD
-   ! ----------------------------------------------------------------------------
-+  ! CHECK:      omp.target
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute simd
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute simd
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute simd if(.true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute simd
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute simd if(target: .true.) if(teams: .false.) if(simd: .false.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute simd
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute simd if(target: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute simd
-+
-+  ! CHECK:      omp.target
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute simd if(teams: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute simd
- 
-   ! CHECK:      omp.target
-   ! CHECK-NOT:  if({{.*}})
-@@ -417,28 +1168,53 @@
-   ! CHECK:      omp.teams
-   ! CHECK-NOT:  if({{.*}})
-   ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp target teams distribute simd if(simd: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute simd
-+
-+  ! ----------------------------------------------------------------------------
-+  ! TARGET TEAMS
-+  ! ----------------------------------------------------------------------------
-+  ! CHECK:      omp.target
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target teams
-   i = 1
-   !$omp end target teams
- 
-   ! CHECK:      omp.target
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.teams
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target teams if(.true.)
-   i = 1
-   !$omp end target teams
- 
-   ! CHECK:      omp.target
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.teams
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target teams if(target: .true.) if(teams: .false.)
-   i = 1
-   !$omp end target teams
- 
-   ! CHECK:      omp.target
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   ! CHECK:      omp.teams
-   ! CHECK-NOT:  if({{.*}})
-   ! CHECK-SAME: {
-@@ -451,11 +1227,28 @@
-   ! CHECK-SAME: {
-   ! CHECK:      omp.teams
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp target teams if(teams: .true.)
-   i = 1
-   !$omp end target teams
- 
-   ! ----------------------------------------------------------------------------
-+  ! TARGET UPDATE
-+  ! ----------------------------------------------------------------------------
-+
-+  ! CHECK:      omp.target_update
-+  ! CHECK-NOT:  if({{.*}})
-+  !$omp target update to(i)
-+
-+  ! CHECK:      omp.target_update
-+  ! CHECK-SAME: if({{.*}})
-+  !$omp target update to(i) if(.true.)
-+
-+  ! CHECK:      omp.target_update
-+  ! CHECK-SAME: if({{.*}})
-+  !$omp target update to(i) if(target update: .true.)
-+
-+  ! ----------------------------------------------------------------------------
-   ! TASK
-   ! ----------------------------------------------------------------------------
-   ! CHECK:      omp.task
-@@ -466,15 +1259,336 @@
- 
-   ! CHECK:      omp.task
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp task if(.true.)
-   !$omp end task
- 
-   ! CHECK:      omp.task
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp task if(task: .true.)
-   !$omp end task
- 
-   ! ----------------------------------------------------------------------------
-+  ! TEAMS DISTRIBUTE
-+  ! ----------------------------------------------------------------------------
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute if(.true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute if(teams: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute
-+
-+  ! ----------------------------------------------------------------------------
-+  ! TEAMS DISTRIBUTE PARALLEL DO
-+  ! ----------------------------------------------------------------------------
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute parallel do
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute parallel do
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute parallel do if(.true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute parallel do
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute parallel do if(teams: .true.) if(parallel: .false.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute parallel do
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute parallel do if(teams: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute parallel do
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute parallel do if(parallel: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute parallel do
-+
-+  ! ----------------------------------------------------------------------------
-+  ! TEAMS DISTRIBUTE PARALLEL DO SIMD
-+  ! ----------------------------------------------------------------------------
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute parallel do simd
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute parallel do simd
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute parallel do simd if(.true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute parallel do simd
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute parallel do simd if(teams: .false.) if(parallel: .true.) if(simd: .false.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute parallel do simd
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute parallel do simd if(teams: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute parallel do simd
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute parallel do simd if(parallel: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute parallel do simd
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.parallel
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.wsloop
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute parallel do simd if(simd: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute parallel do simd
-+
-+  ! ----------------------------------------------------------------------------
-+  ! TEAMS DISTRIBUTE SIMD
-+  ! ----------------------------------------------------------------------------
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute simd
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute simd
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute simd if(.true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute simd
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute simd if(teams: .true.) if(simd: .false.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute simd
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute simd if(teams: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute simd
-+
-+  ! CHECK:      omp.teams
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.distribute
-+  ! CHECK-NOT:  if({{.*}})
-+  ! CHECK-SAME: {
-+  ! CHECK:      omp.simd
-+  ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-+  !$omp teams distribute simd if(simd: .true.)
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute simd
-+
-+  ! ----------------------------------------------------------------------------
-   ! TEAMS
-   ! ----------------------------------------------------------------------------
-   ! CHECK:      omp.teams
-@@ -486,12 +1600,14 @@
- 
-   ! CHECK:      omp.teams
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp teams if(.true.)
-   i = 1
-   !$omp end teams
- 
-   ! CHECK:      omp.teams
-   ! CHECK-SAME: if({{.*}})
-+  ! CHECK-SAME: {
-   !$omp teams if(teams: .true.)
-   i = 1
-   !$omp end teams
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/loop-combined.f90 llvm-project/flang/test/Lower/OpenMP/loop-combined.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/loop-combined.f90	2024-06-12 10:43:12.676209906 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/loop-combined.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -6,19 +6,51 @@
- program main
-   integer :: i
- 
--  ! TODO When DISTRIBUTE, TASKLOOP and TEAMS are supported add:
--  ! - DISTRIBUTE PARALLEL DO SIMD
--  ! - DISTRIBUTE PARALLEL DO
--  ! - DISTRIBUTE SIMD
--  ! - TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD
--  ! - TARGET TEAMS DISTRIBUTE PARALLEL DO
--  ! - TARGET TEAMS DISTRIBUTE SIMD
--  ! - TARGET TEAMS DISTRIBUTE
--  ! - TASKLOOP SIMD
--  ! - TEAMS DISTRIBUTE PARALLEL DO SIMD
--  ! - TEAMS DISTRIBUTE PARALLEL DO
--  ! - TEAMS DISTRIBUTE SIMD
--  ! - TEAMS DISTRIBUTE
-+  ! TODO TASKLOOP SIMD
-+
-+  ! ----------------------------------------------------------------------------
-+  ! DISTRIBUTE PARALLEL DO SIMD
-+  ! ----------------------------------------------------------------------------
-+  !$omp teams
-+
-+  ! CHECK: omp.distribute
-+  ! CHECK: omp.parallel
-+  ! CHECK: omp.wsloop
-+  !$omp distribute parallel do simd
-+  do i = 1, 10
-+  end do
-+  !$omp end distribute parallel do simd
-+
-+  !$omp end teams
-+
-+  ! ----------------------------------------------------------------------------
-+  ! DISTRIBUTE PARALLEL DO
-+  ! ----------------------------------------------------------------------------
-+  !$omp teams
-+  
-+  ! CHECK: omp.distribute
-+  ! CHECK: omp.parallel
-+  ! CHECK: omp.wsloop
-+  !$omp distribute parallel do
-+  do i = 1, 10
-+  end do
-+  !$omp end distribute parallel do
-+
-+  !$omp end teams
-+
-+  ! ----------------------------------------------------------------------------
-+  ! DISTRIBUTE SIMD
-+  ! ----------------------------------------------------------------------------
-+  !$omp teams
-+
-+  ! CHECK: omp.distribute
-+  ! CHECK: omp.simd
-+  !$omp distribute simd
-+  do i = 1, 10
-+  end do
-+  !$omp end distribute simd
-+
-+  !$omp end teams
- 
-   ! ----------------------------------------------------------------------------
-   ! DO SIMD
-@@ -72,6 +104,59 @@
-   !$omp end target parallel do
- 
-   ! ----------------------------------------------------------------------------
-+  ! TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD
-+  ! ----------------------------------------------------------------------------
-+
-+  ! CHECK: omp.target
-+  ! CHECK: omp.teams
-+  ! CHECK: omp.distribute
-+  ! CHECK: omp.parallel
-+  ! CHECK: omp.wsloop
-+  !$omp target teams distribute parallel do simd
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do simd
-+
-+  ! ----------------------------------------------------------------------------
-+  ! TARGET TEAMS DISTRIBUTE PARALLEL DO
-+  ! ----------------------------------------------------------------------------
-+
-+  ! CHECK: omp.target
-+  ! CHECK: omp.teams
-+  ! CHECK: omp.distribute
-+  ! CHECK: omp.parallel
-+  ! CHECK: omp.wsloop
-+  !$omp target teams distribute parallel do
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute parallel do
-+
-+  ! ----------------------------------------------------------------------------
-+  ! TARGET TEAMS DISTRIBUTE SIMD
-+  ! ----------------------------------------------------------------------------
-+
-+  ! CHECK: omp.target
-+  ! CHECK: omp.teams
-+  ! CHECK: omp.distribute
-+  ! CHECK: omp.simd
-+  !$omp target teams distribute simd
-+  do i = 1, 10
++
++  ! BOTH: omp.parallel
++  ! BOTH-SAME: num_threads({{.*}})
++  ! BOTH: omp.distribute
++  ! BOTH-NEXT: omp.wsloop
++  !$omp distribute parallel do num_threads(1)
++  do i=1,10
++    call foo()
 +  end do
-+  !$omp end target teams distribute simd
++  !$omp end distribute parallel do
++  !$omp end teams
++end subroutine distribute_parallel_do
 +
-+  ! ----------------------------------------------------------------------------
-+  ! TARGET TEAMS DISTRIBUTE
-+  ! ----------------------------------------------------------------------------
++! BOTH-LABEL: func.func @_QPdistribute_parallel_do_simd
++subroutine distribute_parallel_do_simd()
++  ! BOTH: omp.target
++  
++  ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]], %{{.*}} -> %[[NUM_THREADS:.*]] : i32, i32, i32, i32)
++  
++  ! DEVICE-NOT: host_eval({{.*}})
++  ! DEVICE-SAME: {
 +
-+  ! CHECK: omp.target
-+  ! CHECK: omp.teams
-+  ! CHECK: omp.distribute
-+  !$omp target teams distribute
-+  do i = 1, 10
-+  end do
-+  !$omp end target teams distribute
++  ! BOTH: omp.teams
++  !$omp target teams
 +
-+  ! ----------------------------------------------------------------------------
-   ! TARGET SIMD
-   ! ----------------------------------------------------------------------------
-   ! CHECK: omp.target
-@@ -80,4 +165,54 @@
-   do i = 1, 10
-   end do
-   !$omp end target simd
++  ! BOTH: omp.parallel
 +
-+  ! ----------------------------------------------------------------------------
-+  ! TEAMS DISTRIBUTE PARALLEL DO SIMD
-+  ! ----------------------------------------------------------------------------
++  ! HOST-SAME: num_threads(%[[NUM_THREADS]] : i32)
++  ! DEVICE-SAME: num_threads({{.*}})
 +
-+  ! CHECK: omp.teams
-+  ! CHECK: omp.distribute
-+  ! CHECK: omp.parallel
-+  ! CHECK: omp.wsloop
-+  !$omp teams distribute parallel do simd
-+  do i = 1, 10
++  ! BOTH: omp.distribute
++  ! BOTH-NEXT: omp.wsloop
++  ! BOTH-NEXT: omp.simd
++  ! BOTH-NEXT: omp.loop_nest
++
++  ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
++  !$omp distribute parallel do simd num_threads(1)
++  do i=1,10
++    call foo()
 +  end do
-+  !$omp end teams distribute parallel do simd
++  !$omp end distribute parallel do simd
++  !$omp end target teams
++
++  ! BOTH: omp.target
++  ! BOTH-NOT: host_eval({{.*}})
++  ! BOTH-SAME: {
++  ! BOTH: omp.teams
++  !$omp target teams
++  call foo() !< Prevents this from being SPMD.
++
++  ! BOTH: omp.parallel
++  ! BOTH-SAME: num_threads({{.*}})
++  ! BOTH: omp.distribute
++  ! BOTH-NEXT: omp.wsloop
++  ! BOTH-NEXT: omp.simd
++  !$omp distribute parallel do simd num_threads(1)
++  do i=1,10
++    call foo()
++  end do
++  !$omp end distribute parallel do simd
++  !$omp end target teams
 +
-+  ! ----------------------------------------------------------------------------
-+  ! TEAMS DISTRIBUTE PARALLEL DO
-+  ! ----------------------------------------------------------------------------
++  ! BOTH: omp.teams
++  !$omp teams
 +
-+  ! CHECK: omp.teams
-+  ! CHECK: omp.distribute
-+  ! CHECK: omp.parallel
-+  ! CHECK: omp.wsloop
-+  !$omp teams distribute parallel do
-+  do i = 1, 10
++  ! BOTH: omp.parallel
++  ! BOTH-SAME: num_threads({{.*}})
++  ! BOTH: omp.distribute
++  ! BOTH-NEXT: omp.wsloop
++  ! BOTH-NEXT: omp.simd
++  !$omp distribute parallel do simd num_threads(1)
++  do i=1,10
++    call foo()
 +  end do
-+  !$omp end teams distribute parallel do
++  !$omp end distribute parallel do simd
++  !$omp end teams
++end subroutine distribute_parallel_do_simd
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90 llvm-project-aso/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,35 @@
++! RUN: %flang_fc1 -fopenmp -emit-fir %s -o - | FileCheck %s
 +
-+  ! ----------------------------------------------------------------------------
-+  ! TEAMS DISTRIBUTE SIMD
-+  ! ----------------------------------------------------------------------------
++! Check that this testcase is lowered to FIR successfully.
 +
-+  ! CHECK: omp.teams
-+  ! CHECK: omp.distribute
-+  ! CHECK: omp.simd
-+  !$omp teams distribute simd
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute simd
++! CHECK: %[[ONE:.*]] = arith.constant 1 : i32
++! CHECK: %[[DECL_N:.*]] = fir.declare %{{.*}} {uniq_name = "_QMtestEn"} : (!fir.ref<i64>) -> !fir.ref<i64>
++! CHECK: %[[HOST_N:.*]] = fir.load %[[DECL_N]] : !fir.ref<i64>
++! CHECK: %[[HOST_LB:.*]] = fir.convert %[[ONE]] : (i32) -> i64
++! CHECK: %[[HOST_STEP:.*]] = fir.convert %[[ONE]] : (i32) -> i64
++! CHECK:      omp.target
++! CHECK-SAME: host_eval(%[[HOST_LB]] -> %[[LB:[[:alnum:]]+]], %[[HOST_N]] -> %[[UB:[[:alnum:]]+]], %[[HOST_STEP]] -> %[[STEP:[[:alnum:]]+]] : i64, i64, i64)
++! CHECK:      omp.teams
++! CHECK:      omp.parallel
++! CHECK:      omp.distribute
++! CHECK-NEXT: omp.wsloop
++! CHECK-NEXT: omp.loop_nest ({{.*}}) : i64 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
 +
-+  ! ----------------------------------------------------------------------------
-+  ! TEAMS DISTRIBUTE
++module Test
++    use, intrinsic :: ISO_Fortran_env, only: REAL64,INT64
++    implicit none
++    integer(kind=INT64) :: N
++    real(kind=REAL64), allocatable :: A(:)
++
++    contains
++        subroutine init_arrays(initA)
++            implicit none
++            real(kind=REAL64), intent(in) :: initA
++            integer(kind=INT64) :: i
++            !$omp target teams distribute parallel do
++            do i = 1, N
++                A(i) = initA
++            end do
++        end subroutine init_arrays
++
++end module Test
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/function-filtering-2.f90 llvm-project-aso/flang/test/Lower/OpenMP/function-filtering-2.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/function-filtering-2.f90	2024-10-18 14:35:01.031241608 -0500
++++ llvm-project-aso/flang/test/Lower/OpenMP/function-filtering-2.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -1,9 +1,9 @@
+ ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-HOST %s
+ ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s
+-! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-DEVICE %s
+-! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s
++! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-DEVICE %s
++! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s
+ ! RUN: bbc -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s
+-! RUN: bbc -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
++! RUN: bbc -target amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
+ 
+ ! MLIR: func.func @{{.*}}implicit_invocation() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>}
+ ! MLIR: return
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/function-filtering-3.f90 llvm-project-aso/flang/test/Lower/OpenMP/function-filtering-3.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/function-filtering-3.f90	2024-08-27 20:36:25.292172480 -0500
++++ llvm-project-aso/flang/test/Lower/OpenMP/function-filtering-3.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -1,9 +1,9 @@
+ ! RUN: %flang_fc1 -fopenmp -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-HOST,LLVM-ALL %s
+ ! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s
+-! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s
+-! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
++! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s
++! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
+ ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s
+-! RUN: bbc -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
++! RUN: bbc -fopenmp -target amdgcn-amd-amdhsa -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
+ 
+ ! Check that the correct LLVM IR functions are kept for the host and device
+ ! after running the whole set of translation and transformation passes from
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/function-filtering.f90 llvm-project-aso/flang/test/Lower/OpenMP/function-filtering.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/function-filtering.f90	2024-10-18 14:35:01.031241608 -0500
++++ llvm-project-aso/flang/test/Lower/OpenMP/function-filtering.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -1,9 +1,9 @@
+ ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-HOST,LLVM-ALL %s
+ ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s
+-! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s
+-! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
++! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s
++! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
+ ! RUN: bbc -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s
+-! RUN: bbc -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
++! RUN: bbc -target amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s
+ 
+ ! Check that the correct LLVM IR functions are kept for the host and device
+ ! after running the whole set of translation and transformation passes from
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir llvm-project-aso/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,64 @@
++// Tests HLFIR-to-FIR conversion aspects relevant to OpenMP. For example, that
++// the correct alloca block is chosen for OMP regions.
++
++// RUN: fir-opt --convert-hlfir-to-fir %s -o - | \
++// RUN: FileCheck %s
++
++fir.global internal @_QQro.1xi4.0(dense<42> : tensor<1xi32>) constant : !fir.array<1xi32>
++
++func.func @_QPfoo() {
++  %c1 = arith.constant 1 : index
++  %host_alloc = fir.alloca !fir.array<1xi32> {bindc_name = "arr", uniq_name = "_QFfooEarr"}
++
++  %1 = fir.shape %c1 : (index) -> !fir.shape<1>
++  %host_decl:2 = hlfir.declare %host_alloc(%1) {uniq_name = "_QFfooEarr"} : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
++  %map_info = omp.map.info var_ptr(%host_decl#1 : !fir.ref<!fir.array<1xi32>>, !fir.array<1xi32>) map_clauses(implicit, tofrom) capture(ByRef)  -> !fir.ref<!fir.array<1xi32>> {name = "arr"}
++
++  // CHECK: omp.target
++  omp.target map_entries(%map_info -> %arg1 : !fir.ref<!fir.array<1xi32>>)  {
++    %c1_2 = arith.constant 1 : index
++    %21 = fir.shape %c1_2 : (index) -> !fir.shape<1>
++
++    // CHECK: %[[TARGET_DECL:.*]] = fir.declare
++    %target_decl:2 = hlfir.declare %arg1(%21) {uniq_name = "_QFfooEarr"} : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
++
++    // CHECK: omp.teams
++    omp.teams {
++      %c1_3 = arith.constant 1 : i32
++      %c10 = arith.constant 10 : i32
++
++      // CHECK: omp.parallel
++      omp.parallel {
++        // CHECK: %[[TO_BOX_ALLOC:.*]] = fir.alloca !fir.box<!fir.array<1xi32>> {pinned}
++        // CHECK: omp.distribute
++        omp.distribute {
++          // CHECK: omp.wsloop
++          omp.wsloop {
++            // CHECK: omp.loop_nest
++            omp.loop_nest (%arg2) : i32 = (%c1_3) to (%c10) inclusive step (%c1_3) {
++              %25 = fir.address_of(@_QQro.1xi4.0) : !fir.ref<!fir.array<1xi32>>
++              %26 = fir.shape %c1_2 : (index) -> !fir.shape<1>
++              %27:2 = hlfir.declare %25(%26) {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQro.1xi4.0"} : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
++
++
++              // CHECK: %[[EMBOX:.*]] = fir.embox %[[TARGET_DECL]]
++              // CHECK: fir.store %[[EMBOX]] to %[[TO_BOX_ALLOC]]
++              // CHECK: %[[BOX_ALLOC_CONV:.*]] = fir.convert %[[TO_BOX_ALLOC]] : (!fir.ref<!fir.box<!fir.array<1xi32>>>) -> !fir.ref<!fir.box<none>>
++              // CHECK: fir.call @_FortranAAssign(%[[BOX_ALLOC_CONV]], {{.*}})
++              hlfir.assign %27#0 to %target_decl#0 : !fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>
++              // CHECK: omp.yield
++              omp.yield
++            }
++          } {omp.composite}
++        } {omp.composite}
++        // CHECK: omp.terminator
++        omp.terminator
++      } {omp.composite}
++      // CHECK: omp.terminator
++      omp.terminator
++    }
++    // CHECK: omp.terminator
++    omp.terminator
++  }
++  return
++}
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/if-clause.f90 llvm-project-aso/flang/test/Lower/OpenMP/if-clause.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/if-clause.f90	2024-09-13 09:46:38.870303386 -0500
++++ llvm-project-aso/flang/test/Lower/OpenMP/if-clause.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -11,7 +11,6 @@
+   ! TODO When they are supported, add tests for:
+   ! - PARALLEL SECTIONS
+   ! - PARALLEL WORKSHARE
+-  ! - TARGET UPDATE
+   ! - TASKLOOP
+   ! - TASKLOOP SIMD
+ 
+@@ -1225,6 +1224,22 @@
+   !$omp end target teams
+ 
+   ! ----------------------------------------------------------------------------
++  ! TARGET UPDATE
 +  ! ----------------------------------------------------------------------------
 +
-+  ! CHECK: omp.teams
-+  ! CHECK: omp.distribute
-+  !$omp teams distribute
-+  do i = 1, 10
-+  end do
-+  !$omp end teams distribute
++  ! CHECK:      omp.target_update
++  ! CHECK-NOT:  if({{.*}})
++  !$omp target update to(i)
 +
- end program main
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/loop-lifetime.f90 llvm-project/flang/test/Lower/OpenMP/loop-lifetime.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/loop-lifetime.f90	1969-12-31 18:00:00.000000000 -0600
-+++ llvm-project/flang/test/Lower/OpenMP/loop-lifetime.f90	2024-06-12 10:44:09.351614239 -0500
++  ! CHECK:      omp.target_update
++  ! CHECK-SAME: if({{.*}})
++  !$omp target update to(i) if(.true.)
++
++  ! CHECK:      omp.target_update
++  ! CHECK-SAME: if({{.*}})
++  !$omp target update to(i) if(target update: .true.)
++
++  ! ----------------------------------------------------------------------------
+   ! TASK
+   ! ----------------------------------------------------------------------------
+   ! CHECK:      omp.task
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/loop-lifetime.f90 llvm-project-aso/flang/test/Lower/OpenMP/loop-lifetime.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/loop-lifetime.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Lower/OpenMP/loop-lifetime.f90	2024-11-23 20:39:47.192175322 -0600
 @@ -0,0 +1,91 @@
 +! This test checks the insertion of lifetime information for loop indices of
 +! OpenMP loop operations.
@@ -9225,12 +9128,12 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/loop-lif
 +
 +! CHECK-LABEL: define void @wsloop_i32
 +subroutine wsloop_i32()
++  ! CHECK:  %[[I_PRIV:.*]] = alloca i32
++  ! CHECK:  %[[I:.*]] = alloca i32
 +  ! CHECK:  %[[LASTITER:.*]] = alloca i32
 +  ! CHECK:  %[[LB:.*]] = alloca i32
 +  ! CHECK:  %[[UB:.*]] = alloca i32
 +  ! CHECK:  %[[STRIDE:.*]] = alloca i32
-+  ! CHECK:  %[[I:.*]] = alloca i32
-+  ! CHECK:  %[[I_PRIV:.*]] = alloca i32
 +  integer :: i
 +
 +  ! CHECK:      call void @llvm.lifetime.start.p0(i64 4, ptr %[[I_PRIV]])
@@ -9249,12 +9152,12 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/loop-lif
 +
 +! CHECK-LABEL: define void @wsloop_i64
 +subroutine wsloop_i64()
++  ! CHECK-DAG:  %[[I_PRIV:.*]] = alloca i64
++  ! CHECK-DAG:  %[[I:.*]] = alloca i64
 +  ! CHECK-DAG:  %[[LASTITER:.*]] = alloca i32
 +  ! CHECK-DAG:  %[[LB:.*]] = alloca i64
 +  ! CHECK-DAG:  %[[UB:.*]] = alloca i64
 +  ! CHECK-DAG:  %[[STRIDE:.*]] = alloca i64
-+  ! CHECK-DAG:  %[[I:.*]] = alloca i64
-+  ! CHECK-DAG:  %[[I_PRIV:.*]] = alloca i64
 +  integer*8 :: i
 +
 +  ! CHECK:      call void @llvm.lifetime.start.p0(i64 8, ptr %[[I_PRIV]])
@@ -9273,8 +9176,8 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/loop-lif
 +
 +! CHECK-LABEL: define void @simdloop_i32
 +subroutine simdloop_i32()
-+  ! CHECK:      %[[I:.*]] = alloca i32
 +  ! CHECK:      %[[I_PRIV:.*]] = alloca i32
++  ! CHECK:      %[[I:.*]] = alloca i32
 +  integer :: i
 +
 +  ! CHECK:      call void @llvm.lifetime.start.p0(i64 4, ptr %[[I_PRIV]])
@@ -9293,8 +9196,8 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/loop-lif
 +
 +! CHECK-LABEL: define void @simdloop_i64
 +subroutine simdloop_i64()
-+  ! CHECK:      %[[I:.*]] = alloca i64
 +  ! CHECK:      %[[I_PRIV:.*]] = alloca i64
++  ! CHECK:      %[[I:.*]] = alloca i64
 +  integer*8 :: i
 +
 +  ! CHECK:      call void @llvm.lifetime.start.p0(i64 8, ptr %[[I_PRIV]])
@@ -9310,73 +9213,45 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/loop-lif
 +  end do
 +  !$omp end simd 
 +end subroutine
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/omp-do-simd-safelen.f90 llvm-project/flang/test/Lower/OpenMP/omp-do-simd-safelen.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/omp-do-simd-safelen.f90	1969-12-31 18:00:00.000000000 -0600
-+++ llvm-project/flang/test/Lower/OpenMP/omp-do-simd-safelen.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -0,0 +1,16 @@
-+! This test checks lowering of OpenMP do simd safelen() pragma
-+
-+! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s
-+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s
-+subroutine testDoSimdSafelen(int_array)
-+  integer :: int_array(*)
-+
-+  !CHECK: omp.wsloop {
-+  !CHECK: omp.simd safelen(4) {
-+  !CHECK: omp.loop_nest {{.*}} {
-+  !$omp do simd safelen(4)
-+    do index_ = 1, 10
-+    end do
-+  !$omp end do simd
-+
-+end subroutine testDoSimdSafelen
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/omp-do-simd-simdlen.f90 llvm-project/flang/test/Lower/OpenMP/omp-do-simd-simdlen.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/omp-do-simd-simdlen.f90	1969-12-31 18:00:00.000000000 -0600
-+++ llvm-project/flang/test/Lower/OpenMP/omp-do-simd-simdlen.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -0,0 +1,16 @@
-+! This test checks lowering of OpenMP do simd simdlen() pragma
-+
-+! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s
-+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s
-+subroutine testDoSimdSimdlen(int_array)
-+  integer :: int_array(*)
-+
-+  !CHECK: omp.wsloop {
-+  !CHECK: omp.simd simdlen(4) {
-+  !CHECK: omp.loop_nest {{.*}} {
-+  !$omp do simd simdlen(4)
-+    do index_ = 1, 10
-+    end do
-+  !$omp end do simd
-+
-+end subroutine testDoSimdSimdlen
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/parallel-reduction3.f90 llvm-project/flang/test/Lower/OpenMP/parallel-reduction3.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/parallel-reduction3.f90	2024-06-12 10:43:12.676209906 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/parallel-reduction3.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -69,13 +69,13 @@
- ! CHECK:           omp.parallel {
- ! CHECK:             %[[VAL_14:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
- ! CHECK:             %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
--! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
--! CHECK:             %[[VAL_17:.*]] = arith.constant 100 : i32
--! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
--! CHECK:             %[[VAL_19:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
--! CHECK:             fir.store %[[VAL_12]]#0 to %[[VAL_19]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
--! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_19]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
--! CHECK-NEXT:          omp.loop_nest (%[[VAL_21:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
-+! CHECK:             %[[VAL_16:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
-+! CHECK:             fir.store %[[VAL_12]]#0 to %[[VAL_16]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
-+! CHECK:             %[[VAL_17:.*]] = arith.constant 1 : i32
-+! CHECK:             %[[VAL_18:.*]] = arith.constant 100 : i32
-+! CHECK:             %[[VAL_19:.*]] = arith.constant 1 : i32
-+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_16]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
-+! CHECK-NEXT:          omp.loop_nest (%[[VAL_21:.*]]) : i32 = (%[[VAL_17]]) to (%[[VAL_18]]) inclusive step (%[[VAL_19]]) {
- ! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFsEc"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> (!fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.array<?xi32>>>)
- ! CHECK:                 fir.store %[[VAL_21]] to %[[VAL_15]]#1 : !fir.ref<i32>
- ! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/reduction_var_map.f90 llvm-project/flang/test/Lower/OpenMP/reduction_var_map.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/reduction_var_map.f90	1969-12-31 18:00:00.000000000 -0600
-+++ llvm-project/flang/test/Lower/OpenMP/reduction_var_map.f90	2024-06-12 10:44:09.351614239 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/reduction-target-spmd.f90 llvm-project-aso/flang/test/Lower/OpenMP/reduction-target-spmd.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/reduction-target-spmd.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Lower/OpenMP/reduction-target-spmd.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,15 @@
++! RUN: %flang_fc1 -emit-fir -fopenmp -o - %s | FileCheck %s
++! RUN: bbc -emit-fir -fopenmp -o - %s | FileCheck %s
++
++! CHECK:       omp.teams
++! CHECK-SAME:  reduction(@add_reduction_i32 %{{.*}} -> %{{.*}} : !fir.ref<i32>)
++subroutine myfun()
++  integer :: i, j
++  i = 0
++  j = 0
++  !$omp target teams distribute parallel do reduction(+:i)
++  do j = 1,5
++     i = i + j
++  end do
++  !$omp end target teams distribute parallel do
++end subroutine myfun
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/reduction-teams.f90 llvm-project-aso/flang/test/Lower/OpenMP/reduction-teams.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/reduction-teams.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Lower/OpenMP/reduction-teams.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,13 @@
++! RUN: bbc -emit-fir -fopenmp -o - %s | FileCheck %s
++! RUN: %flang_fc1 -emit-fir -fopenmp -o - %s | FileCheck %s
++
++! CHECK:       omp.teams
++! CHECK-SAME:  reduction
++subroutine reduction_teams()
++  integer :: i
++  i = 0
++
++  !$omp teams reduction(+:i)
++  i = i + 1
++  !$omp end teams
++end subroutine reduction_teams
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/reduction_var_map.f90 llvm-project-aso/flang/test/Lower/OpenMP/reduction_var_map.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/reduction_var_map.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Lower/OpenMP/reduction_var_map.f90	2024-11-23 20:39:47.192175322 -0600
 @@ -0,0 +1,43 @@
 +!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
 +
@@ -9421,24 +9296,41 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/reductio
 +end subroutine omp_target_team_separate
 +!CHECK-LABEL: func.func @_QPomp_target_team_separate() {
 +!CHECK:  omp.map.info var_ptr({{.*}} : !fir.ref<i64>, i64) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i64> {name = "s3"}
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/simd.f90 llvm-project/flang/test/Lower/OpenMP/simd.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/simd.f90	2024-06-12 10:43:12.676209906 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/simd.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -27,10 +27,10 @@
-   ! CHECK: %[[ARG_N:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %{{[0-9]+}} {uniq_name = "_QFsimd_with_if_clauseEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
-   integer :: i, n, threshold
-   !$OMP SIMD IF( n .GE. threshold )
-+  ! CHECK: %[[COND:.*]] = arith.cmpi sge
-   ! CHECK: %[[LB:.*]] = arith.constant 1 : i32
-   ! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0
-   ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
--  ! CHECK: %[[COND:.*]] = arith.cmpi sge
-   ! CHECK: omp.simd if(%[[COND:.*]]) {
-   ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
-   do i = 1, n
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/target.f90 llvm-project/flang/test/Lower/OpenMP/target.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/target.f90	2024-06-12 10:43:12.676209906 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/target.f90	2024-06-12 10:44:09.351614239 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/rtl-flags.f90 llvm-project-aso/flang/test/Lower/OpenMP/rtl-flags.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/rtl-flags.f90	2024-08-27 20:36:25.296172440 -0500
++++ llvm-project-aso/flang/test/Lower/OpenMP/rtl-flags.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -20,7 +20,7 @@
+ !RUN: bbc -emit-hlfir -fopenmp -fopenmp-assume-no-nested-parallelism -fopenmp-is-target-device -o - %s | FileCheck %s --check-prefix=NEST-PAR-DEVICE-FIR
+ !RUN: bbc -emit-hlfir -fopenmp -fopenmp-target-debug=1 -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism -fopenmp-assume-threads-oversubscription -fopenmp-assume-no-thread-state -fopenmp-is-target-device -o - %s | FileCheck %s --check-prefix=ALL-DEVICE-FIR
+ 
+-!DEFAULT-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<openmp_device_version = 11>
++!DEFAULT-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<openmp_device_version = 52>
+ !DEFAULT-DEVICE-FIR-SAME: omp.is_target_device = true
+ !DEFAULT-DEVICE-FIR-VERSION: module attributes {{{.*}}omp.flags = #omp.flags<openmp_device_version = 45>
+ !DEFAULT-DEVICE-FIR-VERSION-SAME: omp.is_target_device = true
+@@ -28,12 +28,12 @@
+ !DEFAULT-HOST-FIR: module attributes {{{.*}}omp.is_target_device = false{{.*}}
+ !DEFAULT-HOST-FIR-VERSION: module attributes {{{.*}}omp.is_target_device = false
+ !DEFAULT-HOST-FIR-VERSION-SAME: omp.version = #omp.version<version = 45>
+-!DBG-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<debug_kind = 1, openmp_device_version = 11>
+-!DBG-EQ-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<debug_kind = 111, openmp_device_version = 11>
+-!TEAMS-OSUB-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<assume_teams_oversubscription = true, openmp_device_version = 11>
+-!THREAD-OSUB-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<assume_threads_oversubscription = true, openmp_device_version = 11>
+-!THREAD-STATE-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<assume_no_thread_state = true, openmp_device_version = 11>
+-!NEST-PAR-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<assume_no_nested_parallelism = true, openmp_device_version = 11>
+-!ALL-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<debug_kind = 1, assume_teams_oversubscription = true, assume_threads_oversubscription = true, assume_no_thread_state = true, assume_no_nested_parallelism = true, openmp_device_version = 11>
++!DBG-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<debug_kind = 1, openmp_device_version = 52>
++!DBG-EQ-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<debug_kind = 111, openmp_device_version = 52>
++!TEAMS-OSUB-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<assume_teams_oversubscription = true, openmp_device_version = 52>
++!THREAD-OSUB-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<assume_threads_oversubscription = true, openmp_device_version = 52>
++!THREAD-STATE-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<assume_no_thread_state = true, openmp_device_version = 52>
++!NEST-PAR-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<assume_no_nested_parallelism = true, openmp_device_version = 52>
++!ALL-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<debug_kind = 1, assume_teams_oversubscription = true, assume_threads_oversubscription = true, assume_no_thread_state = true, assume_no_nested_parallelism = true, openmp_device_version = 52>
+ subroutine omp_subroutine()
+ end subroutine omp_subroutine
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/target.f90 llvm-project-aso/flang/test/Lower/OpenMP/target.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/target.f90	2024-11-23 20:25:26.851275134 -0600
++++ llvm-project-aso/flang/test/Lower/OpenMP/target.f90	2024-11-23 20:39:47.192175322 -0600
 @@ -45,16 +45,16 @@
     integer :: b(1024)
     integer :: c(1024)
@@ -9497,31 +9389,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/target.f
  end subroutine omp_target_exit_mt
  
  !===============================================================================
-@@ -320,13 +320,13 @@
-    !CHECK: omp.terminator
-    !$omp end target data
-    !CHECK: }
--   !CHECK: %[[BOUNDS_B:.*]] = omp.map.bounds   lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}})
--   !CHECK: %[[MAP_B:.*]] = omp.map.info var_ptr(%[[VAR_B_DECL]]#0 : !fir.ref<!fir.array<1024xi32>>, !fir.array<1024xi32>)   map_clauses(always, from) capture(ByRef) bounds(%[[BOUNDS_B]]) -> !fir.ref<!fir.array<1024xi32>> {name = "b"}
--   !CHECK: omp.target_data   map_entries(%[[MAP_B]] : !fir.ref<!fir.array<1024xi32>>) {
--   !$omp target data map(always, from : b)
--   !CHECK: omp.terminator
--   !$omp end target data
--   !CHECK: }
-+   !CHECK %[[BOUNDS_B:.*]] = omp.map.bounds   lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}})
-+   !CHECK %[[MAP_B:.*]] = omp.map.info var_ptr(%[[VAR_B_DECL]]#0 : !fir.ref<!fir.array<1024xi32>>, !fir.array<1024xi32>)   map_clauses(always, from) capture(ByRef) bounds(%[[BOUNDS_B]]) -> !fir.ref<!fir.array<1024xi32>> {name = "b"}
-+   !CHECK omp.target_data   map_entries(%[[MAP_B]] : !fir.ref<!fir.array<1024xi32>>) {
-+   !!$omp target data map(always, from : b)
-+   !CHECK omp.terminator
-+   !!$omp end target data
-+   !CHECK }
- end subroutine omp_target_data_mt
- 
- !===============================================================================
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/target_private.f90 llvm-project/flang/test/Lower/OpenMP/target_private.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/target_private.f90	1969-12-31 18:00:00.000000000 -0600
-+++ llvm-project/flang/test/Lower/OpenMP/target_private.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -0,0 +1,76 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/target_private.f90 llvm-project-aso/flang/test/Lower/OpenMP/target_private.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/target_private.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Lower/OpenMP/target_private.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,73 @@
 +!Test data-sharing attribute clauses for the `target` directive.
 +
 +!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
@@ -9531,319 +9402,376 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/target_p
 +    implicit none
 +    integer :: x(1)
 +
-+!$omp target private(x)
-+    x(1) = 42
-+!$omp end target
-+!CHECK: omp.target {
-+!CHECK-DAG:    %[[C1:.*]] = arith.constant 1 : index
-+!CHECK-DAG:    %[[PRIV_ALLOC:.*]] = fir.alloca !fir.array<1xi32> {bindc_name = "x",
-+!CHECK-SAME:     pinned, uniq_name = "_QFomp_target_privateEx"}
-+!CHECK-NEXT:   %[[SHAPE:.*]] = fir.shape %[[C1]] : (index) -> !fir.shape<1>
-+!CHECK-NEXT:   %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]](%[[SHAPE]])
-+!CHECK-SAME:     {uniq_name = "_QFomp_target_privateEx"} :
-+!CHECK-SAME:     (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) ->
-+!CHECK-SAME:     (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
-+!CHECK-DAG:    %[[C42:.*]] = arith.constant 42 : i32
-+!CHECK-DAG:    %[[C1_2:.*]] = arith.constant 1 : index
-+!CHECK-NEXT:   %[[PRIV_BINDING:.*]] = hlfir.designate %[[PRIV_DECL]]#0 (%[[C1_2]])
-+!CHECK-SAME:     : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-+!CHECK-NEXT:   hlfir.assign %[[C42]] to %[[PRIV_BINDING]] : i32, !fir.ref<i32>
-+!CHECK-NEXT:   omp.terminator
-+!CHECK-NEXT: }
++!$omp target private(x)
++    x(1) = 42
++!$omp end target
++!CHECK: omp.target {
++!CHECK-DAG:    %[[C1:.*]] = arith.constant 1 : index
++!CHECK-DAG:    %[[PRIV_ALLOC:.*]] = fir.alloca !fir.array<1xi32> {bindc_name = "x",
++!CHECK-SAME:     pinned, uniq_name = "_QFomp_target_privateEx"}
++!CHECK-NEXT:   %[[SHAPE:.*]] = fir.shape %[[C1]] : (index) -> !fir.shape<1>
++!CHECK-NEXT:   %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]](%[[SHAPE]])
++!CHECK-SAME:     {uniq_name = "_QFomp_target_privateEx"} :
++!CHECK-SAME:     (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) ->
++!CHECK-SAME:     (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
++!CHECK-DAG:    %[[C42:.*]] = arith.constant 42 : i32
++!CHECK-DAG:    %[[C1_2:.*]] = arith.constant 1 : index
++!CHECK-NEXT:   %[[PRIV_BINDING:.*]] = hlfir.designate %[[PRIV_DECL]]#0 (%[[C1_2]])
++!CHECK-SAME:     : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
++!CHECK-NEXT:   hlfir.assign %[[C42]] to %[[PRIV_BINDING]] : i32, !fir.ref<i32>
++!CHECK-NEXT:   omp.terminator
++!CHECK-NEXT: }
++
++end subroutine omp_target_private
++
++!CHECK-LABEL: func.func @_QPomp_target_target_do_simd()
++subroutine omp_target_target_do_simd()
++    implicit none
++
++    real(8) :: var
++    integer(8) :: iv
++
++!$omp target teams distribute parallel do simd private(iv,var)
++    do iv=0,10
++        var = 3.14
++    end do
++!$omp end target teams distribute parallel do simd 
++
++!CHECK: %[[IV:.*]] = omp.map.info{{.*}}map_clauses(implicit{{.*}}{name = "iv"}
++!CHECK: %[[VAR:.*]] = omp.map.info{{.*}}map_clauses(implicit{{.*}}{name = "var"}
++!CHECK: omp.target
++!CHECK-SAME: map_entries(%[[IV]] -> %[[MAP_IV:.*]], %[[VAR]] -> %[[MAP_VAR:.*]] : !fir.ref<i64>, !fir.ref<f64>)
++!CHECK:       %[[MAP_IV_DECL:.*]]:2 = hlfir.declare %[[MAP_IV]]
++!CHECK:       %[[MAP_VAR_DECL:.*]]:2 = hlfir.declare %[[MAP_VAR]]
++!CHECK:       omp.teams {
++!CHECK:         omp.parallel private(@{{.*}} %[[MAP_IV_DECL]]#0 -> %[[IV_PRIV:.*]], @{{.*}} %[[MAP_VAR_DECL]]#0 -> %[[VAR_PRIV:.*]] : !fir.ref<i64>, !fir.ref<f64>) {
++!CHECK:         %[[IV_DECL:.*]]:2 = hlfir.declare %[[IV_PRIV]]
++!CHECK:         %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_PRIV]]
++!CHECK:           omp.distribute {
++!CHECK-NEXT:        omp.wsloop {
++!CHECK-NEXT:          omp.simd {
++!CHECK-NEXT:            omp.loop_nest
++!CHECK:                   fir.store {{.*}} to %[[IV_DECL]]#1
++!CHECK:                   hlfir.assign {{.*}} to %[[VAR_DECL]]#0
++!CHECK:                   omp.yield
++!CHECK-NEXT:            }
++!CHECK-NEXT:          } {omp.composite}
++!CHECK-NEXT:        } {omp.composite}
++!CHECK-NEXT:      } {omp.composite}
++!CHECK-NEXT:      omp.terminator
++!CHECK-NEXT:    }
++!CHECK-NEXT:    omp.terminator
++!CHECK-NEXT:  }
++!CHECK-NEXT:  omp.terminator
++!CHECK-NEXT: }
++
++end subroutine omp_target_target_do_simd
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/target-spmd.f90 llvm-project-aso/flang/test/Lower/OpenMP/target-spmd.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/target-spmd.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Lower/OpenMP/target-spmd.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,191 @@
++! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
++
++! CHECK-LABEL: func.func @_QPdistribute_parallel_do_generic() {
++subroutine distribute_parallel_do_generic()
++  ! CHECK: omp.target
++  ! CHECK-NOT: host_eval({{.*}})
++  ! CHECK-SAME: {
++  !$omp target
++  !$omp teams
++  !$omp distribute parallel do
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end distribute parallel do
++  call bar()
++  !$omp end teams
++  !$omp end target
++
++  ! CHECK: omp.target
++  ! CHECK-NOT: host_eval({{.*}})
++  ! CHECK-SAME: {
++  !$omp target teams
++  !$omp distribute parallel do
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end distribute parallel do
++  call bar()
++  !$omp end target teams
++
++  ! CHECK: omp.target
++  ! CHECK-NOT: host_eval({{.*}})
++  ! CHECK-SAME: {
++  !$omp target teams
++  !$omp distribute parallel do
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end distribute parallel do
++
++  !$omp distribute parallel do
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end distribute parallel do
++  !$omp end target teams
++end subroutine distribute_parallel_do_generic
++
++! CHECK-LABEL: func.func @_QPdistribute_parallel_do_spmd() {
++subroutine distribute_parallel_do_spmd()
++  ! CHECK: omp.target
++  ! CHECK-SAME: host_eval({{.*}})
++  !$omp target
++  !$omp teams
++  !$omp distribute parallel do
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end distribute parallel do
++  !$omp end teams
++  !$omp end target
++
++  ! CHECK: omp.target
++  ! CHECK-SAME: host_eval({{.*}})
++  !$omp target teams
++  !$omp distribute parallel do
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end distribute parallel do
++  !$omp end target teams
++end subroutine distribute_parallel_do_spmd
++
++! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_generic() {
++subroutine distribute_parallel_do_simd_generic()
++  ! CHECK: omp.target
++  ! CHECK-NOT: host_eval({{.*}})
++  ! CHECK-SAME: {
++  !$omp target
++  !$omp teams
++  !$omp distribute parallel do simd
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end distribute parallel do simd
++  call bar()
++  !$omp end teams
++  !$omp end target
++
++  ! CHECK: omp.target
++  ! CHECK-NOT: host_eval({{.*}})
++  ! CHECK-SAME: {
++  !$omp target teams
++  !$omp distribute parallel do simd
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end distribute parallel do simd
++  call bar()
++  !$omp end target teams
++
++  ! CHECK: omp.target
++  ! CHECK-NOT: host_eval({{.*}})
++  ! CHECK-SAME: {
++  !$omp target teams
++  !$omp distribute parallel do simd
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end distribute parallel do simd
++
++  !$omp distribute parallel do simd
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end distribute parallel do simd
++  !$omp end target teams
++end subroutine distribute_parallel_do_simd_generic
 +
-+end subroutine omp_target_private
++! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_spmd() {
++subroutine distribute_parallel_do_simd_spmd()
++  ! CHECK: omp.target
++  ! CHECK-SAME: host_eval({{.*}})
++  !$omp target
++  !$omp teams
++  !$omp distribute parallel do simd
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end distribute parallel do simd
++  !$omp end teams
++  !$omp end target
 +
-+!CHECK-LABEL: func.func @_QPomp_target_target_do_simd()
-+subroutine omp_target_target_do_simd()
-+    implicit none
++  ! CHECK: omp.target
++  ! CHECK-SAME: host_eval({{.*}})
++  !$omp target teams
++  !$omp distribute parallel do simd
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end distribute parallel do simd
++  !$omp end target teams
++end subroutine distribute_parallel_do_simd_spmd
 +
-+    real(8) :: var
-+    integer(8) :: iv
++! CHECK-LABEL: func.func @_QPteams_distribute_parallel_do_spmd() {
++subroutine teams_distribute_parallel_do_spmd()
++  ! CHECK: omp.target
++  ! CHECK-SAME: host_eval({{.*}})
++  !$omp target
++  !$omp teams distribute parallel do
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end teams distribute parallel do
++  !$omp end target
++end subroutine teams_distribute_parallel_do_spmd
 +
-+!$omp target teams distribute parallel do simd private(iv,var)
-+    do iv=0,10
-+        var = 3.14
-+    end do
-+!$omp end target teams distribute parallel do simd 
++! CHECK-LABEL: func.func @_QPteams_distribute_parallel_do_simd_spmd() {
++subroutine teams_distribute_parallel_do_simd_spmd()
++  ! CHECK: omp.target
++  ! CHECK-SAME: host_eval({{.*}})
++  !$omp target
++  !$omp teams distribute parallel do simd
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end teams distribute parallel do simd
++  !$omp end target
++end subroutine teams_distribute_parallel_do_simd_spmd
 +
-+!CHECK: %[[IV:.*]] = omp.map.info{{.*}}map_clauses(implicit{{.*}}{name = "iv"}
-+!CHECK: %[[VAR:.*]] = omp.map.info{{.*}}map_clauses(implicit{{.*}}{name = "var"}
-+!CHECK: omp.target
-+!CHECK-SAME: map_entries(%[[IV]] -> %{{.*}}, %[[VAR]] -> %{{.*}}
-+!CHECK:       omp.teams {
-+!CHECK:         %[[IV_PRIV:.*]] = fir.alloca i64 {bindc_name = "iv"
-+!CHECK:         %[[IV_DECL:.*]]:2 = hlfir.declare %[[IV_PRIV]]
-+!CHECK:         %[[VAR_PRIV:.*]] = fir.alloca f64 {bindc_name = "var"
-+!CHECK:         %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_PRIV]]
-+!CHECK:         omp.distribute {
-+!CHECK-NEXT:      omp.parallel {
-+!CHECK-NEXT:        omp.wsloop {
-+!CHECK-NEXT:          omp.simd {
-+!CHECK-NEXT:            omp.loop_nest
-+!CHECK:                   fir.store {{.*}} to %[[IV_DECL]]#1
-+!CHECK:                   hlfir.assign {{.*}} to %[[VAR_DECL]]#0
-+!CHECK:                   omp.yield
-+!CHECK-NEXT:            }
-+!CHECK-NEXT:            omp.terminator
-+!CHECK-NEXT:          }
-+!CHECK-NEXT:          omp.terminator
-+!CHECK-NEXT:        }
-+!CHECK-NEXT:        omp.terminator
-+!CHECK-NEXT:      }
-+!CHECK-NEXT:      omp.terminator
-+!CHECK-NEXT:    }
-+!CHECK-NEXT:    omp.terminator
-+!CHECK-NEXT:  }
-+!CHECK-NEXT:  omp.terminator
-+!CHECK-NEXT: }
++! CHECK-LABEL: func.func @_QPtarget_teams_distribute_parallel_do_spmd() {
++subroutine target_teams_distribute_parallel_do_spmd()
++  ! CHECK: omp.target
++  ! CHECK-SAME: host_eval({{.*}})
++  !$omp target teams distribute parallel do
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end target teams distribute parallel do
++end subroutine target_teams_distribute_parallel_do_spmd
 +
-+end subroutine omp_target_target_do_simd
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90 llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90	2024-06-12 10:43:12.676209906 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -1,11 +1,11 @@
- ! This test checks lowering of OpenMP do simd aligned() pragma
- 
--! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
--! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-+! RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
-+! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
- subroutine testDoSimdAligned(int_array)
-         use iso_c_binding
-         type(c_ptr) :: int_array
--!CHECK: not yet implemented: Unhandled clause ALIGNED in DO SIMD construct
-+!CHECK: not yet implemented: Unhandled clause ALIGNED in SIMD construct
- !$omp do simd aligned(int_array)
-         do index_ = 1, 10
-           call c_test_call(int_array)
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90	2024-06-12 10:43:12.676209906 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -4,7 +4,7 @@
- ! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
- subroutine testDoSimdLinear(int_array)
-         integer :: int_array(*)
--!CHECK: not yet implemented: Unhandled clause LINEAR in DO SIMD construct
-+!CHECK: not yet implemented: Unhandled clause LINEAR in DO construct
- !$omp do simd linear(int_array)
-         do index_ = 1, 10
-         end do
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90 llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90	2024-06-12 10:43:12.676209906 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90	1969-12-31 18:00:00.000000000 -0600
-@@ -1,14 +0,0 @@
--! This test checks lowering of OpenMP do simd safelen() pragma
--
--! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
--! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
--subroutine testDoSimdSafelen(int_array)
--        integer :: int_array(*)
--!CHECK: not yet implemented: Unhandled clause SAFELEN in DO SIMD construct
--!$omp do simd safelen(4)
--        do index_ = 1, 10
--        end do
--!$omp end do simd
--
--end subroutine testDoSimdSafelen
--
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90 llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90	2024-06-12 10:43:12.676209906 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90	1969-12-31 18:00:00.000000000 -0600
-@@ -1,14 +0,0 @@
--! This test checks lowering of OpenMP do simd simdlen() pragma
--
++! CHECK-LABEL: func.func @_QPtarget_teams_distribute_parallel_do_simd_spmd() {
++subroutine target_teams_distribute_parallel_do_simd_spmd()
++  ! CHECK: omp.target
++  ! CHECK-SAME: host_eval({{.*}})
++  !$omp target teams distribute parallel do simd
++  do i = 1, 10
++    call foo(i)
++  end do
++  !$omp end target teams distribute parallel do simd
++end subroutine target_teams_distribute_parallel_do_simd_spmd
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/Todo/reduction-teams.f90 llvm-project-aso/flang/test/Lower/OpenMP/Todo/reduction-teams.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/Todo/reduction-teams.f90	2024-08-27 20:36:25.292172480 -0500
++++ llvm-project-aso/flang/test/Lower/OpenMP/Todo/reduction-teams.f90	1969-12-31 18:00:00.000000000 -0600
+@@ -1,12 +0,0 @@
 -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
 -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
--subroutine testDoSimdSimdlen(int_array)
--        integer :: int_array(*)
--!CHECK: not yet implemented: Unhandled clause SIMDLEN in DO SIMD construct
--!$omp do simd simdlen(4)
--        do index_ = 1, 10
--        end do
--!$omp end do simd
 -
--end subroutine testDoSimdSimdlen
+-! CHECK: not yet implemented: Unhandled clause REDUCTION in TEAMS construct
+-subroutine reduction_teams()
+-  integer :: i
+-  i = 0
 -
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/Todo/reduction-teams.f90 llvm-project/flang/test/Lower/OpenMP/Todo/reduction-teams.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/Todo/reduction-teams.f90	2023-08-31 11:50:50.526255637 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/Todo/reduction-teams.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -1,7 +1,9 @@
--! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
--! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-+! RUN: bbc -emit-fir -fopenmp -o - %s | FileCheck %s
-+! RUN: %flang_fc1 -emit-fir -fopenmp -o - %s | FileCheck %s
-+! XFAIL: *
+-  !$omp teams reduction(+:i)
+-  i = i + 1
+-  !$omp end teams
+-end subroutine reduction_teams
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 llvm-project-aso/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
+--- llvm-project-aso-orig/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90	2024-11-23 20:25:26.851275134 -0600
++++ llvm-project-aso/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -14,9 +14,9 @@
+     real, pointer :: pa(:)
+     type(c_ptr) :: cptr
+ 
+-       !$omp target data use_device_ptr(pa, cptr, array)
+-       !$omp end target data
+-     end subroutine
++    !$omp target data use_device_ptr(pa, cptr, array)
++    !$omp end target data
++end subroutine
  
--! CHECK: not yet implemented: Unhandled clause REDUCTION in TEAMS construct
-+! CHECK:       omp.teams
-+! CHECK-SAME:  reduction
- subroutine reduction_teams()
-   integer :: i
-   i = 0
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/wsloop-chunks.f90 llvm-project/flang/test/Lower/OpenMP/wsloop-chunks.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/wsloop-chunks.f90	2024-06-12 10:43:12.680209864 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/wsloop-chunks.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -16,12 +16,12 @@
- do i=1, 9
-   print*, i
- 
--! CHECK:         %[[VAL_2:.*]] = arith.constant 1 : i32
--! CHECK:         %[[VAL_3:.*]] = arith.constant 9 : i32
--! CHECK:         %[[VAL_4:.*]] = arith.constant 1 : i32
--! CHECK:         %[[VAL_5:.*]] = arith.constant 4 : i32
--! CHECK:         omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait {
--! CHECK-NEXT:      omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
-+! CHECK:         %[[VAL_2:.*]] = arith.constant 4 : i32
-+! CHECK:         %[[VAL_3:.*]] = arith.constant 1 : i32
-+! CHECK:         %[[VAL_4:.*]] = arith.constant 9 : i32
-+! CHECK:         %[[VAL_5:.*]] = arith.constant 1 : i32
-+! CHECK:         omp.wsloop schedule(static = %[[VAL_2]] : i32) nowait {
-+! CHECK-NEXT:      omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
- ! CHECK:             fir.store %[[ARG0]] to %[[STORE_IV:.*]]#1 : !fir.ref<i32>
- ! CHECK:             %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]]#0 : !fir.ref<i32>
- ! CHECK:             {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
-@@ -37,12 +37,12 @@
- do i=1, 9
-   print*, i*2
- 
--! CHECK:         %[[VAL_14:.*]] = arith.constant 1 : i32
--! CHECK:         %[[VAL_15:.*]] = arith.constant 9 : i32
--! CHECK:         %[[VAL_16:.*]] = arith.constant 1 : i32
--! CHECK:         %[[VAL_17:.*]] = arith.constant 4 : i32
--! CHECK:         omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait {
--! CHECK-NEXT:      omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
-+! CHECK:         %[[VAL_14:.*]] = arith.constant 4 : i32
-+! CHECK:         %[[VAL_15:.*]] = arith.constant 1 : i32
-+! CHECK:         %[[VAL_16:.*]] = arith.constant 9 : i32
-+! CHECK:         %[[VAL_17:.*]] = arith.constant 1 : i32
-+! CHECK:         omp.wsloop schedule(static = %[[VAL_14]] : i32) nowait {
-+! CHECK-NEXT:      omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[VAL_15]]) to (%[[VAL_16]]) inclusive step (%[[VAL_17]]) {
- ! CHECK:             fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref<i32>
- ! CHECK:             %[[VAL_24:.*]] = arith.constant 2 : i32
- ! CHECK:             %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]]#0 : !fir.ref<i32>
-@@ -64,12 +64,12 @@
- !$OMP END DO NOWAIT
- ! CHECK:         %[[VAL_28:.*]] = arith.constant 6 : i32
- ! CHECK:         hlfir.assign %[[VAL_28]] to %[[VAL_0]]#0 : i32, !fir.ref<i32>
--! CHECK:         %[[VAL_29:.*]] = arith.constant 1 : i32
--! CHECK:         %[[VAL_30:.*]] = arith.constant 9 : i32
--! CHECK:         %[[VAL_31:.*]] = arith.constant 1 : i32
--! CHECK:         %[[VAL_32:.*]] = fir.load %[[VAL_0]]#0 : !fir.ref<i32>
--! CHECK:         omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait {
--! CHECK-NEXT:      omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
-+! CHECK:         %[[VAL_29:.*]] = fir.load %[[VAL_0]]#0 : !fir.ref<i32>
-+! CHECK:         %[[VAL_30:.*]] = arith.constant 1 : i32
-+! CHECK:         %[[VAL_31:.*]] = arith.constant 9 : i32
-+! CHECK:         %[[VAL_32:.*]] = arith.constant 1 : i32
-+! CHECK:         omp.wsloop schedule(static = %[[VAL_29]] : i32) nowait {
-+! CHECK-NEXT:      omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[VAL_30]]) to (%[[VAL_31]]) inclusive step (%[[VAL_32]]) {
- ! CHECK:             fir.store %[[ARG2]] to %[[STORE_IV2:.*]]#1 : !fir.ref<i32>
- ! CHECK:             %[[VAL_39:.*]] = arith.constant 3 : i32
- ! CHECK:             %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]]#0 : !fir.ref<i32>
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90	2024-06-12 10:43:12.680209864 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90	2024-06-12 10:44:09.351614239 -0500
-@@ -73,14 +73,14 @@
- ! CHECK:           omp.parallel {
- ! CHECK:             %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
- ! CHECK:             %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
--! CHECK:             %[[VAL_8:.*]] = arith.constant 0 : i32
--! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
--! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
--! CHECK:             %[[VAL_11:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
--! CHECK:             %[[VAL_12:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
--! CHECK:             fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
--! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_12]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
--! CHECK-NEXT:          omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
-+! CHECK:             %[[VAL_8:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
-+! CHECK:             %[[VAL_9:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
-+! CHECK:             fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
-+! CHECK:             %[[VAL_10:.*]] = arith.constant 0 : i32
-+! CHECK:             %[[VAL_11:.*]] = arith.constant 10 : i32
-+! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
-+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_9]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
-+! CHECK-NEXT:          omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
- ! CHECK:                 %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>)
- ! CHECK:                 fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
- ! CHECK:                 %[[VAL_16:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90	2024-06-12 10:43:12.680209864 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90	2024-06-12 10:44:09.355614196 -0500
-@@ -79,13 +79,13 @@
- ! CHECK:           omp.parallel {
- ! CHECK:             %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
- ! CHECK:             %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
--! CHECK:             %[[VAL_6:.*]] = arith.constant 0 : i32
--! CHECK:             %[[VAL_7:.*]] = arith.constant 10 : i32
--! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
--! CHECK:             %[[VAL_9:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
--! CHECK:             fir.store %[[VAL_3]]#1 to %[[VAL_9]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
--! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_Uxf64 %[[VAL_9]] -> %[[VAL_10:.*]] : !fir.ref<!fir.box<!fir.array<?xf64>>>) {
--! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
-+! CHECK:             %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
-+! CHECK:             fir.store %[[VAL_3]]#1 to %[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
-+! CHECK:             %[[VAL_7:.*]] = arith.constant 0 : i32
-+! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
-+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_Uxf64 %[[VAL_6]] -> %[[VAL_10:.*]] : !fir.ref<!fir.box<!fir.array<?xf64>>>) {
-+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
- ! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.ref<!fir.box<!fir.array<?xf64>>>) -> (!fir.ref<!fir.box<!fir.array<?xf64>>>, !fir.ref<!fir.box<!fir.array<?xf64>>>)
- ! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_5]]#1 : !fir.ref<i32>
- ! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-array.f90	2024-06-12 10:43:12.680209864 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-array.f90	2024-06-12 10:44:09.355614196 -0500
-@@ -73,14 +73,14 @@
- ! CHECK:           omp.parallel {
- ! CHECK:             %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
- ! CHECK:             %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
--! CHECK:             %[[VAL_8:.*]] = arith.constant 0 : i32
--! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
--! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
--! CHECK:             %[[VAL_11:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
--! CHECK:             %[[VAL_12:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
--! CHECK:             fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
--! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_12]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
--! CHECK-NEXT:          omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
-+! CHECK:             %[[VAL_8:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
-+! CHECK:             %[[VAL_9:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
-+! CHECK:             fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
-+! CHECK:             %[[VAL_10:.*]] = arith.constant 0 : i32
-+! CHECK:             %[[VAL_11:.*]] = arith.constant 10 : i32
-+! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
-+! CHECK:             omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_9]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
-+! CHECK-NEXT:          omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
- ! CHECK:                 %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>)
- ! CHECK:                 fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
- ! CHECK:                 %[[VAL_16:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90 llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90
---- llvm-project.orig/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90	2024-06-12 10:43:12.680209864 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90	2024-06-12 10:44:09.355614196 -0500
-@@ -109,14 +109,14 @@
- ! CHECK:           omp.parallel {
- ! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
- ! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
--! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
--! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
-+! CHECK:             %[[VAL_13:.*]] = fir.embox %[[VAL_4]]#0(%[[VAL_3]]) : (!fir.ref<!fir.array<3x3xf64>>, !fir.shape<2>) -> !fir.box<!fir.array<3x3xf64>>
-+! CHECK:             %[[VAL_14:.*]] = fir.alloca !fir.box<!fir.array<3x3xf64>>
-+! CHECK:             fir.store %[[VAL_13]] to %[[VAL_14]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>
- ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
--! CHECK:             %[[VAL_16:.*]] = fir.embox %[[VAL_4]]#0(%[[VAL_3]]) : (!fir.ref<!fir.array<3x3xf64>>, !fir.shape<2>) -> !fir.box<!fir.array<3x3xf64>>
--! CHECK:             %[[VAL_17:.*]] = fir.alloca !fir.box<!fir.array<3x3xf64>>
--! CHECK:             fir.store %[[VAL_16]] to %[[VAL_17]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>
--! CHECK:             omp.wsloop reduction(@add_reduction_f64 %[[VAL_8]]#0 -> %[[VAL_18:.*]] : !fir.ref<f64>, byref @add_reduction_byref_box_3x3xf64 %[[VAL_17]] -> %[[VAL_19:.*]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>) {
--! CHECK:               omp.loop_nest (%[[VAL_20:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-+! CHECK:             %[[VAL_16:.*]] = arith.constant 10 : i32
-+! CHECK:             %[[VAL_17:.*]] = arith.constant 1 : i32
-+! CHECK:             omp.wsloop reduction(@add_reduction_f64 %[[VAL_8]]#0 -> %[[VAL_18:.*]] : !fir.ref<f64>, byref @add_reduction_byref_box_3x3xf64 %[[VAL_14]] -> %[[VAL_19:.*]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>) {
-+! CHECK:               omp.loop_nest (%[[VAL_20:.*]]) : i32 = (%[[VAL_15]]) to (%[[VAL_16]]) inclusive step (%[[VAL_17]]) {
- ! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFEscalar"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
- ! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFEarray"} : (!fir.ref<!fir.box<!fir.array<3x3xf64>>>) -> (!fir.ref<!fir.box<!fir.array<3x3xf64>>>, !fir.ref<!fir.box<!fir.array<3x3xf64>>>)
- ! CHECK:                 fir.store %[[VAL_20]] to %[[VAL_12]]#1 : !fir.ref<i32>
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurrent/basic_device.f90 llvm-project/flang/test/Transforms/DoConcurrent/basic_device.f90
---- llvm-project.orig/flang/test/Transforms/DoConcurrent/basic_device.f90	1969-12-31 18:00:00.000000000 -0600
-+++ llvm-project/flang/test/Transforms/DoConcurrent/basic_device.f90	2024-06-12 10:44:09.355614196 -0500
+ !CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr()
+ !CHECK: omp.target_data use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) use_device_ptr({{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
+@@ -26,9 +26,9 @@
+     real, pointer :: pa(:)
+     type(c_ptr) :: cptr
+ 
+-       !$omp target data use_device_ptr(pa, cptr) use_device_addr(array)
+-       !$omp end target data
+-     end subroutine
++    !$omp target data use_device_ptr(pa, cptr) use_device_addr(array)
++    !$omp end target data
++end subroutine
+ 
+      !CHECK: func.func @{{.*}}only_use_device_addr()
+      !CHECK: omp.target_data use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
+@@ -38,9 +38,9 @@
+         real, pointer :: pa(:)
+         type(c_ptr) :: cptr
+ 
+-       !$omp target data use_device_addr(pa, cptr, array)
+-       !$omp end target data
+-     end subroutine
++    !$omp target data use_device_addr(pa, cptr, array)
++    !$omp end target data
++end subroutine
+ 
+      !CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr_and_map()
+      !CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
+@@ -51,9 +51,9 @@
+         real, pointer :: pa(:)
+         type(c_ptr) :: cptr
+ 
+-       !$omp target data use_device_ptr(pa, cptr) use_device_addr(array) map(tofrom: i, j)
+-       !$omp end target data
+-     end subroutine
++    !$omp target data use_device_ptr(pa, cptr) use_device_addr(array) map(tofrom: i, j)
++    !$omp end target data
++end subroutine
+ 
+      !CHECK: func.func @{{.*}}only_use_map()
+      !CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
+@@ -63,6 +63,6 @@
+         real, pointer :: pa(:)
+         type(c_ptr) :: cptr
+ 
+-       !$omp target data map(pa, cptr, array)
+-       !$omp end target data
+-     end subroutine
++    !$omp target data map(pa, cptr, array)
++    !$omp end target data
++end subroutine
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Semantics/OpenMP/combined-constructs.f90 llvm-project-aso/flang/test/Semantics/OpenMP/combined-constructs.f90
+--- llvm-project-aso-orig/flang/test/Semantics/OpenMP/combined-constructs.f90	2024-11-23 20:25:26.855275120 -0600
++++ llvm-project-aso/flang/test/Semantics/OpenMP/combined-constructs.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -1,4 +1,4 @@
+-! RUN: %python %S/../test_errors.py %s %flang -fopenmp
++! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=11
+ 
+ program main
+   implicit none
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Semantics/OpenMP/ordered01.f90 llvm-project-aso/flang/test/Semantics/OpenMP/ordered01.f90
+--- llvm-project-aso-orig/flang/test/Semantics/OpenMP/ordered01.f90	2024-11-14 15:28:41.138642459 -0600
++++ llvm-project-aso/flang/test/Semantics/OpenMP/ordered01.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -1,4 +1,4 @@
+-! RUN: %python %S/../test_errors.py %s %flang -fopenmp
++! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+ ! OpenMP Version 5.1
+ ! Check OpenMP construct validity for the following directives:
+ ! 2.19.9 Ordered Construct
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Semantics/OpenMP/ordered03.f90 llvm-project-aso/flang/test/Semantics/OpenMP/ordered03.f90
+--- llvm-project-aso-orig/flang/test/Semantics/OpenMP/ordered03.f90	2024-11-14 15:28:41.138642459 -0600
++++ llvm-project-aso/flang/test/Semantics/OpenMP/ordered03.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -1,4 +1,4 @@
+-! RUN: %python %S/../test_errors.py %s %flang -fopenmp
++! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+ ! OpenMP Version 5.1
+ ! Check OpenMP construct validity for the following directives:
+ ! 2.19.9 Ordered Construct
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/basic_device.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/basic_device.f90
+--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/basic_device.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/basic_device.f90	2024-11-23 20:39:47.192175322 -0600
 @@ -0,0 +1,86 @@
 +! Tests mapping of a basic `do concurrent` loop to
 +! `!$omp target teams distribute parallel do`.
@@ -9868,7 +9796,19 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre
 +
 +    ! CHECK-NOT: fir.do_loop
 +
-+    ! CHECK-DAG: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#0
++    ! CHECK: %[[DUPLICATED_C1:.*]] = arith.constant 1 : i32
++    ! CHECK: %[[DUPLICATED_LB:.*]] = fir.convert %[[DUPLICATED_C1]] : (i32) -> index
++    ! CHECK: %[[DUPLICATED_C10:.*]] = arith.constant 10 : i32
++    ! CHECK: %[[DUPLICATED_UB:.*]] = fir.convert %[[DUPLICATED_C10]] : (i32) -> index
++    ! CHECK: %[[DUPLICATED_STEP:.*]] = arith.constant 1 : index
++    
++    ! CHECK: %[[C1:.*]] = arith.constant 1 : i32
++    ! CHECK: %[[HOST_LB:.*]] = fir.convert %[[C1]] : (i32) -> index
++    ! CHECK: %[[C10:.*]] = arith.constant 10 : i32
++    ! CHECK: %[[HOST_UB:.*]] = fir.convert %[[C10]] : (i32) -> index
++    ! CHECK: %[[HOST_STEP:.*]] = arith.constant 1 : index
++
++    ! CHECK-DAG: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#1
 +    ! CHECK: %[[C0:.*]] = arith.constant 0 : index
 +    ! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %[[C0]] : index
 +
@@ -9876,32 +9816,22 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre
 +    ! CHECK-SAME: upper_bound(%[[UPPER_BOUND]] : index)
 +    ! CHECK-SAME: extent(%[[A_EXTENT]] : index)
 +
-+    ! CHECK-DAG: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#0 : {{[^(]+}})
++    ! CHECK-DAG: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#1 : {{[^(]+}})
 +    ! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]])
 +
-+    ! CHECK: %[[TRIP_COUNT:.*]] = arith.muli %{{.*}}, %{{.*}} : i64
-+
-+    ! CHECK: omp.target trip_count(%[[TRIP_COUNT]] : i64)
++    ! CHECK: omp.target
++    ! CHECK-SAME: host_eval(%[[HOST_LB]] -> %[[LB:[[:alnum:]]+]], %[[HOST_UB]] -> %[[UB:[[:alnum:]]+]], %[[HOST_STEP]] -> %[[STEP:[[:alnum:]]+]] : index, index, index)
 +    ! CHECK-SAME: map_entries(%[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]],
 +    ! CHECK-SAME:             %[[A_MAP_INFO]] -> %[[A_ARG:.[[:alnum:]]+]]
 +
-+    ! CHECK-NEXT: ^{{.*}}(%[[I_ARG]]: !fir.ref<i32>, %[[A_ARG]]: !fir.ref<!fir.array<10xi32>>):
-+
 +    ! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]]
 +    ! CHECK: omp.teams {
++    ! CHECK-NEXT: omp.parallel {
 +
 +    ! CHECK-NEXT: %[[ITER_VAR:.*]] = fir.alloca i32 {bindc_name = "i"}
 +    ! CHECK-NEXT: %[[BINDING:.*]]:2 = hlfir.declare %[[ITER_VAR]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 +
-+    ! CHECK: %[[C1:.*]] = arith.constant 1 : i32
-+    ! CHECK: %[[LB:.*]] = fir.convert %[[C1]] : (i32) -> index
-+    ! CHECK: %[[C10:.*]] = arith.constant 10 : i32
-+    ! CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index
-+    ! CHECK: %[[STEP:.*]] = arith.constant 1 : index
-+
 +    ! CHECK-NEXT: omp.distribute {
-+    ! CHECK-NEXT: omp.parallel {
-+
 +    ! CHECK-NEXT: omp.wsloop {
 +
 +    ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
@@ -9915,12 +9845,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre
 +    ! CHECK-NEXT: omp.yield
 +    ! CHECK-NEXT: }
 +
++    ! CHECK-NEXT: } {omp.composite}
++    ! CHECK-NEXT: } {omp.composite}
 +    ! CHECK-NEXT: omp.terminator
-+    ! CHECK-NEXT: }
-+    ! CHECK-NEXT: omp.terminator
-+    ! CHECK-NEXT: }
-+    ! CHECK-NEXT: omp.terminator
-+    ! CHECK-NEXT: }
++    ! CHECK-NEXT: } {omp.composite}
 +    ! CHECK-NEXT: omp.terminator
 +    ! CHECK-NEXT: }
 +    ! CHECK-NEXT: omp.terminator
@@ -9931,10 +9859,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre
 +
 +    ! CHECK-NOT: fir.do_loop
 +end program do_concurrent_basic
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurrent/basic_host.f90 llvm-project/flang/test/Transforms/DoConcurrent/basic_host.f90
---- llvm-project.orig/flang/test/Transforms/DoConcurrent/basic_host.f90	1969-12-31 18:00:00.000000000 -0600
-+++ llvm-project/flang/test/Transforms/DoConcurrent/basic_host.f90	2024-06-12 10:44:09.355614196 -0500
-@@ -0,0 +1,50 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/basic_host.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/basic_host.f90
+--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/basic_host.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/basic_host.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,49 @@
 +! Tests mapping of a basic `do concurrent` loop to `!$omp parallel do`.
 +
 +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %s -o - \
@@ -9974,7 +9902,6 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre
 +    ! CHECK-NEXT: hlfir.assign %[[IV_VAL1]] to %[[ARR_ACCESS]] : i32, !fir.ref<i32>
 +    ! CHECK-NEXT: omp.yield
 +    ! CHECK-NEXT: }
-+    ! CHECK-NEXT: omp.terminator
 +    ! CHECK-NEXT: }
 +
 +    ! CHECK-NEXT: omp.terminator
@@ -9985,10 +9912,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre
 +
 +    ! CHECK-NOT: fir.do_loop
 +end program do_concurrent_basic
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurrent/basic_host.mlir llvm-project/flang/test/Transforms/DoConcurrent/basic_host.mlir
---- llvm-project.orig/flang/test/Transforms/DoConcurrent/basic_host.mlir	1969-12-31 18:00:00.000000000 -0600
-+++ llvm-project/flang/test/Transforms/DoConcurrent/basic_host.mlir	2024-06-12 10:44:09.355614196 -0500
-@@ -0,0 +1,63 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/basic_host.mlir llvm-project-aso/flang/test/Transforms/DoConcurrent/basic_host.mlir
+--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/basic_host.mlir	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/basic_host.mlir	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,62 @@
 +// Tests mapping of a basic `do concurrent` loop to `!$omp parallel do`.
 +
 +// RUN: fir-opt --fopenmp-do-concurrent-conversion="map-to=host" %s | FileCheck %s
@@ -10033,7 +9960,6 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre
 +    // CHECK-NEXT: hlfir.assign %[[IV_VAL1]] to %[[ARR_ACCESS]] : i32, !fir.ref<i32>
 +    // CHECK-NEXT: omp.yield
 +    // CHECK-NEXT: }
-+    // CHECK-NEXT: omp.terminator
 +    // CHECK-NEXT: }
 +
 +    // CHECK-NEXT: omp.terminator
@@ -10052,10 +9978,533 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/test/Transforms/DoConcurre
 +
 +    return
 +  }
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/tools/bbc/bbc.cpp llvm-project/flang/tools/bbc/bbc.cpp
---- llvm-project.orig/flang/tools/bbc/bbc.cpp	2024-06-12 10:43:12.700209654 -0500
-+++ llvm-project/flang/tools/bbc/bbc.cpp	2024-06-12 10:44:09.355614196 -0500
-@@ -139,6 +139,12 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90
+--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,76 @@
++! Tests that locally destroyed values in a `do concurrent` loop are properly
++! handled. Locally destroyed values are those values for which the Fortran runtime
++! calls `@_FortranADestroy` inside the loops body. If these values are allocated
++! outside the loop, and the loop is mapped to OpenMP, then a runtime error would
++! occur due to multiple teams trying to access the same allocation.
++
++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %s -o - \
++! RUN:   | FileCheck %s --check-prefixes=COMMON
++
++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %s -o - \
++! RUN:   | FileCheck %s --check-prefixes=COMMON,DEVICE
++
++module struct_mod
++    type test_struct
++        integer, allocatable :: x_
++    end type
++
++    interface test_struct
++        pure module function construct_from_components(x) result(struct)
++            implicit none
++            integer, intent(in) :: x
++            type(test_struct) struct
++        end function
++    end interface
++end module
++
++submodule(struct_mod) struct_sub
++    implicit none
++
++contains
++    module procedure construct_from_components
++        struct%x_ = x
++    end procedure
++end submodule struct_sub
++
++program main
++    use struct_mod, only : test_struct
++
++    implicit none
++    type(test_struct), dimension(10) :: a
++    integer :: i
++    integer :: total
++
++    do concurrent (i=1:10)
++        a(i) = test_struct(i)
++    end do
++
++    do i=1,10
++        total = total + a(i)%x_
++    end do
++
++    print *, "total =", total
++end program main
++
++! DEVICE: omp.target {{.*}} {
++! DEVICE: omp.teams {
++! COMMON: omp.parallel {
++! COMMON:   %[[LOCAL_TEMP:.*]] = fir.alloca !fir.type<_QMstruct_modTtest_struct{x_:!fir.box<!fir.heap<i32>>}> {bindc_name = ".result"}
++! DEVICE:   omp.distribute {
++! COMMON:   omp.wsloop {
++! COMMON:     omp.loop_nest {{.*}} {
++! COMMON:       %[[TEMP_VAL:.*]] = fir.call @_QMstruct_modPconstruct_from_components
++! COMMON:       fir.save_result %[[TEMP_VAL]] to %[[LOCAL_TEMP]]
++! COMMON:       %[[EMBOXED_LOCAL:.*]] = fir.embox %[[LOCAL_TEMP]]
++! COMMON:       %[[CONVERTED_LOCAL:.*]] = fir.convert %[[EMBOXED_LOCAL]]
++! COMMON:       fir.call @_FortranADestroy(%[[CONVERTED_LOCAL]])
++! COMMON:       omp.yield
++! COMMON:     }
++! COMMON:   }
++! DEVICE:   }
++! COMMON:   omp.terminator
++! COMMON: }
++! DEVICE: omp.terminator
++! DEVICE: }
++! DEVICE: omp.terminator
++! DEVICE: }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/loop_nest_test.f90
+--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/loop_nest_test.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/loop_nest_test.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,87 @@
++! Tests loop-nest detection algorithm for do-concurrent mapping.
++
++! REQUIRES: asserts
++
++! RUN: %flang_fc1 -emit-hlfir  -fopenmp -fdo-concurrent-parallel=host \
++! RUN:   -mmlir -debug %s -o - 2> %t.log || true
++
++! RUN: FileCheck %s < %t.log
++
++program main
++  implicit none
++
++contains
++
++subroutine foo(n)
++  implicit none
++  integer :: n, m
++  integer :: i, j, k
++  integer :: x
++  integer, dimension(n) :: a
++  integer, dimension(n, n, n) :: b
++
++  ! CHECK: Loop pair starting at location
++  ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is perfectly nested
++  do concurrent(i=1:n, j=1:bar(n*m, n/m))
++    a(i) = n
++  end do
++
++  ! CHECK: Loop pair starting at location
++  ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is perfectly nested
++  do concurrent(i=bar(n, x):n, j=1:bar(n*m, n/m))
++    a(i) = n
++  end do
++
++  ! CHECK: Loop pair starting at location
++  ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested
++  do concurrent(i=bar(n, x):n)
++    do concurrent(j=1:bar(n*m, n/m))
++      a(i) = n
++    end do
++  end do
++
++  ! CHECK: Loop pair starting at location
++  ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested
++  do concurrent(i=1:n)
++    x = 10
++    do concurrent(j=1:m)
++      b(i,j,k) = i * j + k
++    end do
++  end do
++
++  ! CHECK: Loop pair starting at location
++  ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested
++  do concurrent(i=1:n)
++    do concurrent(j=1:m)
++      b(i,j,k) = i * j + k
++    end do
++    x = 10
++  end do
++
++  ! CHECK: Loop pair starting at location
++  ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested
++  do concurrent(i=1:n)
++    do concurrent(j=1:m)
++      b(i,j,k) = i * j + k
++      x = 10
++    end do
++  end do
++
++  ! CHECK: Loop pair starting at location
++  ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is perfectly nested
++  do concurrent(i=bar(n, x):n, j=1:bar(n*m, n/m), k=1:bar(n*m, bar(n*m, n/m)))
++    a(i) = n
++  end do
++
++
++end subroutine
++
++pure function bar(n, m)
++    implicit none
++    integer, intent(in) :: n, m
++    integer :: bar
++
++    bar = n + m
++end function
++
++end program main
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90
+--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,118 @@
++! Tests mapping of a `do concurrent` loop with multiple iteration ranges.
++
++! RUN: split-file %s %t
++
++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %t/multi_range.f90 -o - \
++! RUN:   | FileCheck %s --check-prefixes=HOST,COMMON
++
++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %t/multi_range.f90 -o - \
++! RUN:   | FileCheck %s --check-prefixes=DEVICE,COMMON
++
++!--- multi_range.f90
++program main
++   integer, parameter :: n = 20
++   integer, parameter :: m = 40
++   integer, parameter :: l = 60
++   integer :: a(n, m, l)
++
++   do concurrent(i=3:n, j=5:m, k=7:l)
++       a(i,j,k) = i * j + k
++   end do
++end
++
++! COMMON: func.func @_QQmain
++
++! DEVICE: %[[DUPLICATED_C3:.*]] = arith.constant 3 : i32
++! DEVICE: %[[DUPLICATED_LB_I:.*]] = fir.convert %[[DUPLICATED_C3]] : (i32) -> index
++! DEVICE: %[[DUPLICATED_C20:.*]] = arith.constant 20 : i32
++! DEVICE: %[[DUPLICATED_UB_I:.*]] = fir.convert %[[DUPLICATED_C20]] : (i32) -> index
++! DEVICE: %[[DUPLICATED_STEP_I:.*]] = arith.constant 1 : index
++
++! DEVICE: %[[C3:.*]] = arith.constant 3 : i32
++! DEVICE: %[[HOST_LB_I:.*]] = fir.convert %[[C3]] : (i32) -> index
++! DEVICE: %[[C20:.*]] = arith.constant 20 : i32
++! DEVICE: %[[HOST_UB_I:.*]] = fir.convert %[[C20]] : (i32) -> index
++! DEVICE: %[[HOST_STEP_I:.*]] = arith.constant 1 : index
++
++! DEVICE: %[[C5:.*]] = arith.constant 5 : i32
++! DEVICE: %[[HOST_LB_J:.*]] = fir.convert %[[C5]] : (i32) -> index
++! DEVICE: %[[C40:.*]] = arith.constant 40 : i32
++! DEVICE: %[[HOST_UB_J:.*]] = fir.convert %[[C40]] : (i32) -> index
++! DEVICE: %[[HOST_STEP_J:.*]] = arith.constant 1 : index
++
++! DEVICE: %[[C7:.*]] = arith.constant 7 : i32
++! DEVICE: %[[HOST_LB_K:.*]] = fir.convert %[[C7]] : (i32) -> index
++! DEVICE: %[[C60:.*]] = arith.constant 60 : i32
++! DEVICE: %[[HOST_UB_K:.*]] = fir.convert %[[C60]] : (i32) -> index
++! DEVICE: %[[HOST_STEP_K:.*]] = arith.constant 1 : index
++
++! DEVICE: omp.target host_eval(
++! DEVICE-SAME: %[[HOST_LB_I]] -> %[[LB_I:[[:alnum:]]+]],
++! DEVICE-SAME: %[[HOST_UB_I]] -> %[[UB_I:[[:alnum:]]+]],
++! DEVICE-SAME: %[[HOST_STEP_I]] -> %[[STEP_I:[[:alnum:]]+]],
++! DEVICE-SAME: %[[HOST_LB_J]] -> %[[LB_J:[[:alnum:]]+]],
++! DEVICE-SAME: %[[HOST_UB_J]] -> %[[UB_J:[[:alnum:]]+]],
++! DEVICE-SAME: %[[HOST_STEP_J]] -> %[[STEP_J:[[:alnum:]]+]],
++! DEVICE-SAME: %[[HOST_LB_K]] -> %[[LB_K:[[:alnum:]]+]],
++! DEVICE-SAME: %[[HOST_UB_K]] -> %[[UB_K:[[:alnum:]]+]],
++! DEVICE-SAME: %[[HOST_STEP_K]] -> %[[STEP_K:[[:alnum:]]+]] :
++! DEVICE-SAME: index, index, index, index, index, index, index, index, index)
++
++! DEVICE: omp.teams
++
++! HOST-NOT: omp.target
++! HOST-NOT: omp.teams
++
++! COMMON: omp.parallel {
++
++! COMMON-NEXT: %[[ITER_VAR_I:.*]] = fir.alloca i32 {bindc_name = "i"}
++! COMMON-NEXT: %[[BINDING_I:.*]]:2 = hlfir.declare %[[ITER_VAR_I]] {uniq_name = "_QFEi"}
++
++! COMMON-NEXT: %[[ITER_VAR_J:.*]] = fir.alloca i32 {bindc_name = "j"}
++! COMMON-NEXT: %[[BINDING_J:.*]]:2 = hlfir.declare %[[ITER_VAR_J]] {uniq_name = "_QFEj"}
++
++! COMMON-NEXT: %[[ITER_VAR_K:.*]] = fir.alloca i32 {bindc_name = "k"}
++! COMMON-NEXT: %[[BINDING_K:.*]]:2 = hlfir.declare %[[ITER_VAR_K]] {uniq_name = "_QFEk"}
++
++! HOST: %[[C3:.*]] = arith.constant 3 : i32
++! HOST: %[[LB_I:.*]] = fir.convert %[[C3]] : (i32) -> index
++! HOST: %[[C20:.*]] = arith.constant 20 : i32
++! HOST: %[[UB_I:.*]] = fir.convert %[[C20]] : (i32) -> index
++! HOST: %[[STEP_I:.*]] = arith.constant 1 : index
++
++! HOST: %[[C5:.*]] = arith.constant 5 : i32
++! HOST: %[[LB_J:.*]] = fir.convert %[[C5]] : (i32) -> index
++! HOST: %[[C40:.*]] = arith.constant 40 : i32
++! HOST: %[[UB_J:.*]] = fir.convert %[[C40]] : (i32) -> index
++! HOST: %[[STEP_J:.*]] = arith.constant 1 : index
++
++! HOST: %[[C7:.*]] = arith.constant 7 : i32
++! HOST: %[[LB_K:.*]] = fir.convert %[[C7]] : (i32) -> index
++! HOST: %[[C60:.*]] = arith.constant 60 : i32
++! HOST: %[[UB_K:.*]] = fir.convert %[[C60]] : (i32) -> index
++! HOST: %[[STEP_K:.*]] = arith.constant 1 : index
++
++! DEVICE: omp.distribute
++
++! COMMON: omp.wsloop {
++! COMMON-NEXT: omp.loop_nest
++! COMMON-SAME:   (%[[ARG0:[^[:space:]]+]], %[[ARG1:[^[:space:]]+]], %[[ARG2:[^[:space:]]+]])
++! COMMON-SAME:   : index = (%[[LB_I]], %[[LB_J]], %[[LB_K]])
++! COMMON-SAME:     to (%[[UB_I]], %[[UB_J]], %[[UB_K]]) inclusive
++! COMMON-SAME:     step (%[[STEP_I]], %[[STEP_J]], %[[STEP_K]]) {
++
++! COMMON-NEXT: %[[IV_IDX_I:.*]] = fir.convert %[[ARG0]]
++! COMMON-NEXT: fir.store %[[IV_IDX_I]] to %[[BINDING_I]]#1
++
++! COMMON-NEXT: %[[IV_IDX_J:.*]] = fir.convert %[[ARG1]]
++! COMMON-NEXT: fir.store %[[IV_IDX_J]] to %[[BINDING_J]]#1
++
++! COMMON-NEXT: %[[IV_IDX_K:.*]] = fir.convert %[[ARG2]]
++! COMMON-NEXT: fir.store %[[IV_IDX_K]] to %[[BINDING_K]]#1
++
++! COMMON:      omp.yield
++! COMMON-NEXT: }
++! COMMON-NEXT: }
++
++! HOST-NEXT: omp.terminator
++! HOST-NEXT: }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/non_const_bounds.f90
+--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/non_const_bounds.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/non_const_bounds.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,44 @@
++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %s -o - \
++! RUN:   | FileCheck %s
++
++program main
++    implicit none
++
++    call foo(10)
++
++    contains
++        subroutine foo(n)
++            implicit none
++            integer :: n
++            integer :: i
++            integer, dimension(n) :: a
++
++            do concurrent(i=1:n)
++                a(i) = i
++            end do
++        end subroutine
++
++end program main
++
++! CHECK: %[[N_DECL:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %{{.*}} {uniq_name = "_QFFfooEn"}
++! CHECK: fir.load
++! CHECK: %[[N_VAL:.*]] = fir.load %[[N_DECL]]#0 : !fir.ref<i32>
++
++! CHECK: omp.parallel {
++
++! Verify the constant chain of ops for the lower bound are cloned in the region.
++! CHECK:   %[[C1:.*]] = arith.constant 1 : i32
++! CHECK:   %[[LB:.*]] = fir.convert %[[C1]] : (i32) -> index
++
++! Verify that we restort to using the outside value for the upper bound since it
++! is not originally a constant.
++! CHECK:   %[[UB:.*]] = fir.convert %[[N_VAL]] : (i32) -> index
++
++! CHECK:   omp.wsloop {
++! CHECK:     omp.loop_nest (%{{.*}}) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%{{.*}}) {
++! CHECK:       omp.yield
++! CHECK:     }
++! CHECK:   }
++! CHECK:   omp.terminator
++! CHECK: }
++
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90
+--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,65 @@
++! Tests that if `do concurrent` is not perfectly nested in its parent loop, that
++! we skip converting the not-perfectly nested `do concurrent` loop.
++
++
++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %s -o - \
++! RUN:   | FileCheck %s --check-prefixes=HOST,COMMON
++
++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %s -o - \
++! RUN:   | FileCheck %s --check-prefixes=DEVICE,COMMON
++
++program main
++   integer, parameter :: n = 10
++   integer, parameter :: m = 20
++   integer, parameter :: l = 30
++   integer x;
++   integer :: a(n, m, l)
++
++   do concurrent(i=1:n)
++     x = 10
++     do concurrent(j=1:m, k=1:l)
++       a(i,j,k) = i * j + k
++     end do
++   end do
++end
++
++! HOST: %[[ORIG_K_ALLOC:.*]] = fir.alloca i32 {bindc_name = "k"}
++! HOST: %[[ORIG_K_DECL:.*]]:2 = hlfir.declare %[[ORIG_K_ALLOC]]
++
++! HOST: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j"}
++! HOST: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]]
++
++! DEVICE: omp.target {{.*}}map_entries(%{{[^[:space:]]+}} -> %[[I_ARG:[^,]+]],
++! DEVICE-SAME:   %{{[^[:space:]]+}} -> %[[X_ARG:[^,]+]],
++! DEVICE-SAME:   %{{[^[:space:]]+}} -> %[[J_ARG:[^,]+]],
++! DEVICE-SAME:   %{{[^[:space:]]+}} -> %[[K_ARG:[^,]+]],
++! DEVICE-SAME:   %{{[^[:space:]]+}} -> %[[A_ARG:[^:]+]]:
++! DEVICE-SAME:   !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>, !fir.ref<!fir.array<10x20x30xi32>>) {
++
++! DEVICE: %[[TARGET_J_DECL:.*]]:2 = hlfir.declare %[[J_ARG]] {uniq_name = "_QFEj"}
++! DEVICE: %[[TARGET_K_DECL:.*]]:2 = hlfir.declare %[[K_ARG]] {uniq_name = "_QFEk"}
++
++! DEVICE: omp.teams
++
++! COMMON: omp.parallel {
++
++! DEVICE: omp.distribute
++
++! COMMON: omp.wsloop {
++! COMMON: omp.loop_nest ({{[^[:space:]]+}}) {{.*}} {
++! COMMON:   fir.do_loop %[[J_IV:.*]] = {{.*}} {
++! COMMON:     %[[J_IV_CONV:.*]] = fir.convert %[[J_IV]] : (index) -> i32
++! HOST:       fir.store %[[J_IV_CONV]] to %[[ORIG_J_DECL]]#1
++! DEVICE:     fir.store %[[J_IV_CONV]] to %[[TARGET_J_DECL]]#1
++
++! COMMON:     fir.do_loop %[[K_IV:.*]] = {{.*}} {
++! COMMON:       %[[K_IV_CONV:.*]] = fir.convert %[[K_IV]] : (index) -> i32
++! HOST:         fir.store %[[K_IV_CONV]] to %[[ORIG_K_DECL]]#1
++! DEVICE:       fir.store %[[K_IV_CONV]] to %[[TARGET_K_DECL]]#1
++! COMMON:     }
++! COMMON:   }
++! COMMON: omp.yield
++! COMMON: }
++! COMMON: }
++! COMMON: omp.terminator
++! COMMON: }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90
+--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,42 @@
++! Tests `do concurrent` mapping when mapped value(s) depend on values defined
++! outside the target region; e.g. the size of the array is dynamic. This needs
++! to be handled by localizing these region outsiders by either cloning them in
++! the region or in case we cannot do that, map them and use the mapped values.
++
++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %s -o - \
++! RUN:   | FileCheck %s
++
++subroutine foo(n)
++  implicit none
++  integer :: n
++  integer :: i
++  integer, dimension(n) :: a
++
++  do concurrent(i=1:10)
++    a(i) = i
++  end do
++end subroutine
++
++! CHECK-DAG: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFfooEi"}
++! CHECK-DAG: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFfooEa"}
++! CHECK-DAG: %[[N_ALLOC:.*]] = fir.alloca i32
++
++! CHECK-DAG: %[[I_MAP:.*]] = omp.map.info var_ptr(%[[I_DECL]]#1 : {{.*}})
++! CHECK-DAG: %[[A_MAP:.*]] = omp.map.info var_ptr(%[[A_DECL]]#1 : {{.*}})
++! CHECK-DAG: %[[N_MAP:.*]] = omp.map.info var_ptr(%[[N_ALLOC]] : {{.*}})
++
++! CHECK: omp.target
++! CHECK-SAME: map_entries(%[[I_MAP]] -> %[[I_ARG:arg[0-9]*]],
++! CHECK-SAME:             %[[A_MAP]] -> %[[A_ARG:arg[0-9]*]],
++! CHECK-SAME:             %[[N_MAP]] -> %[[N_ARG:arg[0-9]*]] : {{.*}})
++! CHECK-SAME: {{.*}} {
++
++! CHECK-DAG:  %{{.*}} = hlfir.declare %[[I_ARG]]
++! CHECK-DAG:  %{{.*}} = hlfir.declare %[[A_ARG]]
++! CHECK-DAG:  %{{.*}} = fir.load %[[N_ARG]]
++
++! CHECK:   omp.terminator
++! CHECK: }
++
++
++
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 llvm-project-aso/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90
+--- llvm-project-aso-orig/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90	2024-11-23 20:39:47.192175322 -0600
+@@ -0,0 +1,63 @@
++! Tests that if `do concurrent` is indirectly nested in its parent loop, that we
++! skip converting the indirectly nested `do concurrent` loop.
++
++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %s -o - \
++! RUN:   | FileCheck %s --check-prefixes=HOST,COMMON
++
++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %s -o - \
++! RUN:   | FileCheck %s --check-prefixes=DEVICE,COMMON
++
++program main
++   integer, parameter :: n = 10
++   integer, parameter :: m = 20
++   integer, parameter :: l = 30
++   integer x;
++   integer :: a(n, m, l)
++
++   do concurrent(i=1:n)
++     do j=1,m
++       do concurrent(k=1:l)
++         a(i,j,k) = i * j + k
++       end do
++     end do
++   end do
++end
++
++! HOST: %[[ORIG_K_ALLOC:.*]] = fir.alloca i32 {bindc_name = "k"}
++! HOST: %[[ORIG_K_DECL:.*]]:2 = hlfir.declare %[[ORIG_K_ALLOC]]
++
++! HOST: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j", {{.*}}}
++! HOST: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]]
++
++! DEVICE: omp.target {{.*}}map_entries(%{{[^[:space:]]+}} -> %[[I_ARG:[^,]+]],
++! DEVICE-SAME:   %{{[^[:space:]]+}} -> %[[J_ARG:[^,]+]],
++! DEVICE-SAME:   %{{[^[:space:]]+}} -> %[[K_ARG:[^,]+]],
++! DEVICE-SAME:   %{{[^[:space:]]+}} -> %[[A_ARG:[^:]+]]:
++! DEVICE-SAME:   !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>, !fir.ref<!fir.array<10x20x30xi32>>) {
++
++! DEVICE: %[[TARGET_J_DECL:.*]]:2 = hlfir.declare %[[J_ARG]] {uniq_name = "_QFEj"}
++! DEVICE: %[[TARGET_K_DECL:.*]]:2 = hlfir.declare %[[K_ARG]] {uniq_name = "_QFEk"}
++
++! DEVICE: omp.teams
++
++! COMMON: omp.parallel {
++
++! DEVICE: omp.distribute
++
++! COMMON: omp.wsloop {
++! COMMON: omp.loop_nest ({{[^[:space:]]+}}) {{.*}} {
++! COMMON:   fir.do_loop {{.*}} iter_args(%[[J_IV:.*]] = {{.*}}) -> {{.*}} {
++! HOST:       fir.store %[[J_IV]] to %[[ORIG_J_DECL]]#1
++! DEVICE:     fir.store %[[J_IV]] to %[[TARGET_J_DECL]]#1
++
++! COMMON:     fir.do_loop %[[K_IV:.*]] = {{.*}} {
++! COMMON:       %[[K_IV_CONV:.*]] = fir.convert %[[K_IV]] : (index) -> i32
++! HOST:         fir.store %[[K_IV_CONV]] to %[[ORIG_K_DECL]]#1
++! DEVICE:       fir.store %[[K_IV_CONV]] to %[[TARGET_K_DECL]]#1
++! COMMON:     }
++! COMMON:   }
++! COMMON: omp.yield
++! COMMON: }
++! COMMON: }
++! COMMON: omp.terminator
++! COMMON: }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/flang/tools/bbc/bbc.cpp llvm-project-aso/flang/tools/bbc/bbc.cpp
+--- llvm-project-aso-orig/flang/tools/bbc/bbc.cpp	2024-11-23 20:25:26.855275120 -0600
++++ llvm-project-aso/flang/tools/bbc/bbc.cpp	2024-11-23 20:39:47.192175322 -0600
+@@ -142,6 +142,12 @@
                         llvm::cl::desc("enable openmp device compilation"),
                         llvm::cl::init(false));
  
@@ -10068,44 +10517,52 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/flang/tools/bbc/bbc.cpp llvm-pro
  static llvm::cl::opt<bool>
      enableOpenMPGPU("fopenmp-is-gpu",
                      llvm::cl::desc("enable openmp GPU target codegen"),
-@@ -272,7 +278,18 @@
- static mlir::LogicalResult runOpenMPPasses(mlir::ModuleOp mlirModule) {
+@@ -163,7 +169,7 @@
+ static llvm::cl::opt<uint32_t>
+     setOpenMPVersion("fopenmp-version",
+                      llvm::cl::desc("OpenMP standard version"),
+-                     llvm::cl::init(11));
++                     llvm::cl::init(52));
+ 
+ static llvm::cl::opt<uint32_t> setOpenMPTargetDebug(
+     "fopenmp-target-debug",
+@@ -287,7 +293,19 @@
+ static llvm::LogicalResult runOpenMPPasses(mlir::ModuleOp mlirModule) {
    mlir::PassManager pm(mlirModule->getName(),
                         mlir::OpPassManager::Nesting::Implicit);
 -  fir::createOpenMPFIRPassPipeline(pm, enableOpenMPDevice);
 +  using DoConcurrentMappingKind =
 +      Fortran::frontend::CodeGenOptions::DoConcurrentMappingKind;
 +
-+  auto doConcurrentMappingKind =
++  fir::OpenMPFIRPassPipelineOpts opts;
++  opts.isTargetDevice = enableOpenMPDevice;
++  opts.doConcurrentMappingKind =
 +      llvm::StringSwitch<DoConcurrentMappingKind>(
 +          enableDoConcurrentToOpenMPConversion)
 +          .Case("host", DoConcurrentMappingKind::DCMK_Host)
 +          .Case("device", DoConcurrentMappingKind::DCMK_Device)
 +          .Default(DoConcurrentMappingKind::DCMK_None);
 +
-+  fir::createOpenMPFIRPassPipeline(pm, enableOpenMPDevice,
-+                                   doConcurrentMappingKind);
++  fir::createOpenMPFIRPassPipeline(pm, opts);
    (void)mlir::applyPassManagerCLOptions(pm);
    if (mlir::failed(pm.run(mlirModule))) {
      llvm::errs() << "FATAL: failed to correctly apply OpenMP pass pipeline";
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
---- llvm-project.orig/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h	2024-06-12 10:43:13.184204565 -0500
-+++ llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h	2024-06-12 10:44:09.355614196 -0500
-@@ -16,6 +16,7 @@
- 
- #include "llvm/Analysis/MemorySSAUpdater.h"
- #include "llvm/Frontend/OpenMP/OMPConstants.h"
-+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
- #include "llvm/IR/DebugLoc.h"
- #include "llvm/IR/IRBuilder.h"
- #include "llvm/Support/Allocator.h"
-@@ -99,14 +100,18 @@
-   /// expanded.
-   std::optional<bool> IsGPU;
- 
--  // Flag for specifying if offloading is mandatory.
-+  /// Flag for specifying if offloading is mandatory.
-   std::optional<bool> OpenMPOffloadMandatory;
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/.github/workflows/release-binaries.yml llvm-project-aso/.github/workflows/release-binaries.yml
+--- llvm-project-aso-orig/.github/workflows/release-binaries.yml	2024-10-18 17:40:31.952997950 -0500
++++ llvm-project-aso/.github/workflows/release-binaries.yml	2024-11-23 20:39:47.192175322 -0600
+@@ -328,7 +328,7 @@
+       run: |
+         # Build some of the mlir tools that take a long time to link
+         if [ "${{ needs.prepare.outputs.build-flang }}" = "true" ]; then
+-          ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/ -j2 flang bbc
++          ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/ -j2 flang-new bbc
+         fi
+         ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/ \
+             mlir-bytecode-parser-fuzzer \
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm-project-aso/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+--- llvm-project-aso-orig/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h	2024-10-29 11:07:19.577634859 -0500
++++ llvm-project-aso/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h	2024-11-23 20:39:47.196175308 -0600
+@@ -110,7 +110,8 @@
  
    /// First separator used between the initial two parts of a name.
    std::optional<StringRef> FirstSeparator;
@@ -10114,83 +10571,88 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenM
 +  /// Separator used between all of the rest consecutive parts of a name
    std::optional<StringRef> Separator;
  
-+  // Grid Value for the GPU target
-+  std::optional<omp::GV> GridValue;
-+
-   OpenMPIRBuilderConfig();
-   OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU,
-                         bool OpenMPOffloadMandatory,
-@@ -132,6 +137,11 @@
-     return *OpenMPOffloadMandatory;
-   }
+   // Grid Value for the GPU target
+@@ -727,13 +728,12 @@
+                       LoopBodyGenCallbackTy BodyGenCB, Value *TripCount,
+                       const Twine &Name = "loop");
  
-+  omp::GV getGridValue() const {
-+    assert(GridValue.has_value() && "GridValue is not set");
-+    return *GridValue;
-+  }
+-  /// Generator for the control flow structure of an OpenMP canonical loop.
++  /// Calculate the trip count of a canonical loop.
+   ///
+-  /// Instead of a logical iteration space, this allows specifying user-defined
+-  /// loop counter values using increment, upper- and lower bounds. To
+-  /// disambiguate the terminology when counting downwards, instead of lower
+-  /// bounds we use \p Start for the loop counter value in the first body
+-  /// iteration.
++  /// This allows specifying user-defined loop counter values using increment,
++  /// upper- and lower bounds. To disambiguate the terminology when counting
++  /// downwards, instead of lower bounds we use \p Start for the loop counter
++  /// value in the first body iteration.
+   ///
+   /// Consider the following limitations:
+   ///
+@@ -757,7 +757,32 @@
+   ///
+   ///      for (int i = 0; i < 42; i -= 1u)
+   ///
+-  //
++  /// \param Loc       The insert and source location description.
++  /// \param Start     Value of the loop counter for the first iterations.
++  /// \param Stop      Loop counter values past this will stop the loop.
++  /// \param Step      Loop counter increment after each iteration; negative
++  ///                  means counting down.
++  /// \param IsSigned  Whether Start, Stop and Step are signed integers.
++  /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
++  ///                      counter.
++  /// \param Name      Base name used to derive instruction names.
++  ///
++  /// \returns The value holding the calculated trip count.
++  Value *calculateCanonicalLoopTripCount(const LocationDescription &Loc,
++                                         Value *Start, Value *Stop, Value *Step,
++                                         bool IsSigned, bool InclusiveStop,
++                                         const Twine &Name = "loop");
 +
-   bool hasRequiresFlags() const { return RequiresFlags; }
-   bool hasRequiresReverseOffload() const;
-   bool hasRequiresUnifiedAddress() const;
-@@ -167,6 +177,7 @@
-   void setOpenMPOffloadMandatory(bool Value) { OpenMPOffloadMandatory = Value; }
-   void setFirstSeparator(StringRef FS) { FirstSeparator = FS; }
-   void setSeparator(StringRef S) { Separator = S; }
-+  void setGridValue(omp::GV G) { GridValue = G; }
- 
-   void setHasRequiresReverseOffload(bool Value);
-   void setHasRequiresUnifiedAddress(bool Value);
-@@ -1235,12 +1246,14 @@
-   getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
-                            StringRef ParentName = "");
- 
-+  /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used
-+  /// to
-+  /// store lambdas with capture.
-   /// Functions used to generate reductions. Such functions take two Values
-   /// representing LHS and RHS of the reduction, respectively, and a reference
-   /// to the value that is updated to refer to the reduction result.
-   using ReductionGenTy =
-       function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
--
-   /// Functions used to generate atomic reductions. Such functions take two
-   /// Values representing pointers to LHS and RHS of the reduction, as well as
-   /// the element type of these pointers. They are expected to atomically
-@@ -1278,6 +1291,22 @@
-     AtomicReductionGenTy AtomicReductionGen;
-   };
- 
-+  /// \param Loc                The location where the reduction was
-+  ///                           encountered. Must be within the associate
-+  ///                           directive and after the last local access to the
-+  ///                           reduction variables.
-+  /// \param AllocaIP           An insertion point suitable for allocas usable
-+  ///                           in reductions.
-+  /// \param ReductionInfos     A list of info on each reduction variable.
-+  /// \param IsNoWait           A flag set if the reduction is marked as nowait.
-+  InsertPointTy createReductionsGPU(const LocationDescription &Loc,
-+                                    InsertPointTy AllocaIP,
-+                                    ArrayRef<ReductionInfo> ReductionInfos,
-+                                    ArrayRef<bool> IsByRef,
-+                                    bool IsNoWait = false,
-+                                    bool IsTeamsReduction = false,
-+                                    bool HasDistribute = false);
-+
-   // TODO: provide atomic and non-atomic reduction generators for reduction
-   // operators defined by the OpenMP specification.
- 
-@@ -1344,7 +1373,9 @@
-   InsertPointTy createReductions(const LocationDescription &Loc,
-                                  InsertPointTy AllocaIP,
-                                  ArrayRef<ReductionInfo> ReductionInfos,
--                                 ArrayRef<bool> IsByRef, bool IsNoWait = false);
-+                                 ArrayRef<bool> IsByRef, bool IsNoWait = false,
-+                                 bool IsTeamsReduction = false,
-+                                 bool HasDistribute = false);
++  /// Generator for the control flow structure of an OpenMP canonical loop.
++  ///
++  /// Instead of a logical iteration space, this allows specifying user-defined
++  /// loop counter values using increment, upper- and lower bounds. To
++  /// disambiguate the terminology when counting downwards, instead of lower
++  /// bounds we use \p Start for the loop counter value in the first body
++  ///
++  /// It calls \see calculateCanonicalLoopTripCount for trip count calculations,
++  /// so limitations of that method apply here as well.
++  ///
+   /// \param Loc       The insert and source location description.
+   /// \param BodyGenCB Callback that will generate the loop body code.
+   /// \param Start     Value of the loop counter for the first iterations.
+@@ -1878,8 +1903,6 @@
+   ///                           nowait.
+   /// \param IsTeamsReduction   Optional flag set if it is a teams
+   ///                           reduction.
+-  /// \param HasDistribute      Optional flag set if it is a
+-  ///                           distribute reduction.
+   /// \param GridValue          Optional GPU grid value.
+   /// \param ReductionBufNum    Optional OpenMPCUDAReductionBufNumValue to be
+   /// used for teams reduction.
+@@ -1888,7 +1911,6 @@
+       const LocationDescription &Loc, InsertPointTy AllocaIP,
+       InsertPointTy CodeGenIP, ArrayRef<ReductionInfo> ReductionInfos,
+       bool IsNoWait = false, bool IsTeamsReduction = false,
+-      bool HasDistribute = false,
+       ReductionGenCBKind ReductionGenCBKind = ReductionGenCBKind::MLIR,
+       std::optional<omp::GV> GridValue = {}, unsigned ReductionBufNum = 1024,
+       Value *SrcLocInfo = nullptr);
+@@ -1960,7 +1982,8 @@
+                                         InsertPointTy AllocaIP,
+                                         ArrayRef<ReductionInfo> ReductionInfos,
+                                         ArrayRef<bool> IsByRef,
+-                                        bool IsNoWait = false);
++                                        bool IsNoWait = false,
++                                        bool IsTeamsReduction = false);
  
    ///}
  
-@@ -1627,6 +1658,31 @@
+@@ -2234,6 +2257,31 @@
            MapNamesArray(MapNamesArray) {}
    };
  
@@ -10199,9 +10661,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenM
 +  /// populate associated static structures.
 +  struct TargetKernelDefaultBounds {
 +    int32_t MinTeams = 1;
-+    int32_t MaxTeams = -1;
++    SmallVector<int32_t> MaxTeams;
 +    int32_t MinThreads = 1;
-+    int32_t MaxThreads = -1;
++    SmallVector<int32_t> MaxThreads;
 +    int32_t ReductionDataSize = 0;
 +    int32_t ReductionBufferLength = 0;
 +  };
@@ -10212,32 +10674,33 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenM
 +  /// prior to the call to the kernel launch OpenMP RTL function.
 +  struct TargetKernelRuntimeBounds {
 +    Value *LoopTripCount = nullptr;
-+    Value *TargetThreadLimit = nullptr;
-+    Value *TeamsThreadLimit = nullptr;
++    SmallVector<Value *> TargetThreadLimit;
++    SmallVector<Value *> TeamsThreadLimit;
 +    Value *MinTeams = nullptr;
-+    Value *MaxTeams = nullptr;
++    SmallVector<Value *> MaxTeams;
 +    Value *MaxThreads = nullptr;
 +  };
 +
    /// Data structure that contains the needed information to construct the
    /// kernel args vector.
    struct TargetKernelArgs {
-@@ -1635,7 +1691,7 @@
+@@ -2242,7 +2290,7 @@
      /// Arguments passed to the runtime library
      TargetDataRTArgs RTArgs;
      /// The number of iterations
--    Value *NumIterations;
-+    Value *TripCount;
+-    Value *NumIterations = nullptr;
++    Value *TripCount = nullptr;
      /// The number of teams.
-     Value *NumTeams;
+     ArrayRef<Value *> NumTeams;
      /// The number of threads.
-@@ -1647,12 +1703,11 @@
- 
-     /// Constructor for TargetKernelArgs
+@@ -2255,13 +2303,12 @@
+     // Constructors for TargetKernelArgs.
+     TargetKernelArgs() {}
      TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs,
--                     Value *NumIterations, Value *NumTeams, Value *NumThreads,
-+                     Value *TripCount, Value *NumTeams, Value *NumThreads,
-                      Value *DynCGGroupMem, bool HasNoWait)
+-                     Value *NumIterations, ArrayRef<Value *> NumTeams,
++                     Value *TripCount, ArrayRef<Value *> NumTeams,
+                      ArrayRef<Value *> NumThreads, Value *DynCGGroupMem,
+                      bool HasNoWait)
 -        : NumTargetItems(NumTargetItems), RTArgs(RTArgs),
 -          NumIterations(NumIterations), NumTeams(NumTeams),
 -          NumThreads(NumThreads), DynCGGroupMem(DynCGGroupMem),
@@ -10248,7 +10711,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenM
    };
  
    /// Create the kernel args vector used by emitTargetKernel. This function
-@@ -1967,6 +2022,14 @@
+@@ -2625,6 +2672,14 @@
                Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr,
                Value *ThreadLimit = nullptr, Value *IfExpr = nullptr);
  
@@ -10257,13 +10720,13 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenM
 +  /// \param Loc The location where the teams construct was encountered.
 +  /// \param AllocaIP The insertion points to be used for alloca instructions.
 +  /// \param BodyGenCB Callback that will generate the region code.
-+  InsertPointTy createDistribute(const LocationDescription &Loc,
-+                                 InsertPointTy AllocaIP,
-+                                 BodyGenCallbackTy BodyGenCB);
++  InsertPointOrErrorTy createDistribute(const LocationDescription &Loc,
++                                        InsertPointTy AllocaIP,
++                                        BodyGenCallbackTy BodyGenCB);
    /// Generate conditional branch and relevant BasicBlocks through which private
    /// threads copy the 'copyin' variables from Master copy to threadprivate
    /// copies.
-@@ -2079,15 +2142,10 @@
+@@ -2737,15 +2792,10 @@
    ///
    /// \param Loc The insert and source location description.
    /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
@@ -10283,17 +10746,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenM
  
    /// Create a runtime call for kmpc_target_deinit
    ///
-@@ -2113,6 +2171,9 @@
-   static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel,
-                                          int32_t LB, int32_t UB);
- 
-+  /// Write the global variable to indicate which amdgcn ABI to use
-+  static void emit__oclc_ABI_version(Module &M, int32_t COV);
-+
-   /// Read/write a bounds on teams for \p Kernel. Read will return 0 if none
-   /// is set.
-   static std::pair<int32_t, int32_t> readTeamBoundsForKernel(const Triple &T,
-@@ -2187,7 +2248,6 @@
+@@ -2846,7 +2896,6 @@
                                           Function *OutlinedFunction,
                                           StringRef EntryFnName,
                                           StringRef EntryFnIDName);
@@ -10301,54 +10754,51 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/include/llvm/Frontend/OpenM
    /// Type of BodyGen to use for region codegen
    ///
    /// Priv: If device pointer privatization is required, emit the body of the
-@@ -2246,21 +2306,23 @@
+@@ -2905,27 +2954,31 @@
    /// Generator for '#omp target'
    ///
    /// \param Loc where the target data construct was encountered.
 +  /// \param IsSPMD whether this is an SPMD target launch.
+   /// \param IsOffloadEntry whether it is an offload entry.
++  /// \param IfCond value of the IF clause for the TARGET construct or nullptr.
    /// \param CodeGenIP The insertion point where the call to the outlined
    /// function should be emitted.
    /// \param EntryInfo The entry information about the function.
 -  /// \param NumTeams Number of teams specified in the num_teams clause.
 -  /// \param NumThreads Number of teams specified in the thread_limit clause.
-+  /// \param DefaultBounds The default kernel lanuch bounds.
-+  /// \param RuntimeBounds The runtime kernel lanuch bounds.
-   /// \param Inputs The input values to the region that will be passed.
-   /// as arguments to the outlined function.
-   /// \param BodyGenCB Callback that will generate the region code.
-   /// \param ArgAccessorFuncCB Callback that will generate accessors
-   /// instructions for passed in target arguments where neccessary
--  InsertPointTy createTarget(const LocationDescription &Loc,
-+  InsertPointTy createTarget(const LocationDescription &Loc, bool IsSPMD,
-                              OpenMPIRBuilder::InsertPointTy AllocaIP,
-                              OpenMPIRBuilder::InsertPointTy CodeGenIP,
--                             TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
--                             int32_t NumThreads,
-+                             TargetRegionEntryInfo &EntryInfo,
-+                             const TargetKernelDefaultBounds &DefaultBounds,
-+                             const TargetKernelRuntimeBounds &RuntimeBounds,
-                              SmallVectorImpl<Value *> &Inputs,
-                              GenMapInfoCallbackTy GenMapInfoCB,
-                              TargetBodyGenCallbackTy BodyGenCB,
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
---- llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp	2024-06-12 10:43:13.320203135 -0500
-+++ llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp	2024-06-12 10:44:20.399498178 -0500
-@@ -145,10 +145,22 @@
- }
- #endif
- 
-+Function *GLOBAL_ReductionFunc = nullptr;
-+
-+static uint64_t getTypeSizeInBytes(Module &M, Type *Type) {
-+  return divideCeil(M.getDataLayout().getTypeSizeInBits(Type), 8);
-+}
-+
-+static Value *getTypeSizeInBytesValue(IRBuilder<> &Builder, Module &M,
-+                                      Type *Type) {
-+  return Builder.getInt64(getTypeSizeInBytes(M, Type));
-+}
-+
- static const omp::GV &getGridValue(const Triple &T, Function *Kernel) {
++  /// \param DefaultBounds The default kernel launch bounds.
++  /// \param RuntimeBounds The runtime kernel launch bounds.
+   /// \param Inputs The input values to the region that will be passed.
+   /// as arguments to the outlined function.
+   /// \param BodyGenCB Callback that will generate the region code.
+   /// \param ArgAccessorFuncCB Callback that will generate accessors
+-  /// instructions for passed in target arguments where neccessary
++  /// instructions for passed in target arguments where necessary.
+   /// \param Dependencies A vector of DependData objects that carry
+-  // dependency information as passed in the depend clause
++  // dependency information as passed in the depend clause.
+   // \param HasNowait Whether the target construct has a `nowait` clause or not.
+   InsertPointOrErrorTy createTarget(
+-      const LocationDescription &Loc, bool IsOffloadEntry,
+-      OpenMPIRBuilder::InsertPointTy AllocaIP,
++      const LocationDescription &Loc, bool IsSPMD, bool IsOffloadEntry,
++      Value *IfCond, OpenMPIRBuilder::InsertPointTy AllocaIP,
+       OpenMPIRBuilder::InsertPointTy CodeGenIP,
+-      TargetRegionEntryInfo &EntryInfo, ArrayRef<int32_t> NumTeams,
+-      ArrayRef<int32_t> NumThreads, SmallVectorImpl<Value *> &Inputs,
+-      GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB,
++      TargetRegionEntryInfo &EntryInfo,
++      const TargetKernelDefaultBounds &DefaultBounds,
++      const TargetKernelRuntimeBounds &RuntimeBounds,
++      SmallVectorImpl<Value *> &Inputs, GenMapInfoCallbackTy GenMapInfoCB,
++      TargetBodyGenCallbackTy BodyGenCB,
+       TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
+       SmallVector<DependData> Dependencies = {}, bool HasNowait = false);
+ 
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp llvm-project-aso/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+--- llvm-project-aso-orig/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp	2024-11-23 20:25:26.999274603 -0600
++++ llvm-project-aso/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp	2024-11-23 20:40:49.111951908 -0600
+@@ -153,6 +153,7 @@
    if (T.isAMDGPU()) {
      StringRef Features =
          Kernel->getFnAttribute("target-features").getValueAsString();
@@ -10356,16 +10806,16 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
      if (Features.count("+wavefrontsize64"))
        return omp::getAMDGPUGridValues<64>();
      return omp::getAMDGPUGridValues<32>();
-@@ -365,7 +377,7 @@
+@@ -369,7 +370,7 @@
    Builder.restoreIP(OuterAllocaIP);
    Instruction *FakeVal;
    AllocaInst *FakeValAddr =
 -      Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr");
 +      Builder.CreateAlloca(Builder.getInt32Ty(), 0, nullptr, Name + ".addr");
-   ToBeDeleted.push(FakeValAddr);
+   ToBeDeleted.push_back(FakeValAddr);
  
    if (AsPtr) {
-@@ -510,7 +522,7 @@
+@@ -525,7 +526,7 @@
                  KernelArgs.RTArgs.MapTypesArray,
                  KernelArgs.RTArgs.MapNamesArray,
                  KernelArgs.RTArgs.MappersArray,
@@ -10374,7 +10824,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
                  Flags,
                  NumTeams3D,
                  NumThreads3D,
-@@ -722,6 +734,8 @@
+@@ -737,6 +738,8 @@
        Extractor.excludeArgFromAggregate(V);
  
      Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
@@ -10383,7 +10833,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
  
      // Forward target-cpu, target-features attributes to the outlined function.
      auto TargetCpuAttr = OuterFn->getFnAttribute("target-cpu");
-@@ -1190,7 +1204,7 @@
+@@ -1216,7 +1219,7 @@
  static void targetParallelCallback(
      OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn,
      BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition,
@@ -10392,7 +10842,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
      Value *ThreadID, const SmallVector<Instruction *, 4> &ToBeDeleted) {
    // Add some known attributes.
    IRBuilder<> &Builder = OMPIRBuilder->Builder;
-@@ -1207,7 +1221,6 @@
+@@ -1233,7 +1236,6 @@
    CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
    assert(CI && "Expected call instruction to outlined function");
    CI->getParent()->setName("omp_parallel");
@@ -10400,16 +10850,18 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
    Builder.SetInsertPoint(CI);
    Type *PtrTy = OMPIRBuilder->VoidPtr;
    Value *NullPtrValue = Constant::getNullValue(PtrTy);
-@@ -1216,7 +1229,7 @@
+@@ -1241,8 +1243,8 @@
+   // Add alloca for kernel args
    OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
    Builder.SetInsertPoint(OuterAllocaBB, OuterAllocaBB->getFirstInsertionPt());
-   AllocaInst *ArgsAlloca =
+-  AllocaInst *ArgsAlloca =
 -      Builder.CreateAlloca(ArrayType::get(PtrTy, NumCapturedVars));
-+    Builder.CreateAlloca(ArrayType::get(PtrTy, NumCapturedVars), nullptr, "kernel_arg");
++  AllocaInst *ArgsAlloca = Builder.CreateAlloca(
++      ArrayType::get(PtrTy, NumCapturedVars), nullptr, "kernel_arg");
    Value *Args = ArgsAlloca;
    // Add address space cast if array for storing arguments is not allocated
    // in address space 0
-@@ -1278,7 +1291,7 @@
+@@ -1304,7 +1306,7 @@
  static void
  hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn,
                       Function *OuterFn, Value *Ident, Value *IfCondition,
@@ -10418,7 +10870,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
                       const SmallVector<Instruction *, 4> &ToBeDeleted) {
    IRBuilder<> &Builder = OMPIRBuilder->Builder;
    FunctionCallee RTLFn;
-@@ -1471,7 +1484,9 @@
+@@ -1498,7 +1500,9 @@
  
    AllocaInst *PrivTIDAddr =
        Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
@@ -10429,7 +10881,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
  
    // Add some fake uses for OpenMP provided arguments.
    ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"));
-@@ -1509,7 +1524,7 @@
+@@ -1537,7 +1541,7 @@
      OI.PostOutlineCB = [=, ToBeDeletedVec =
                                 std::move(ToBeDeleted)](Function &OutlinedFn) {
        targetParallelCallback(this, OutlinedFn, OuterFn, OuterAllocaBlock, Ident,
@@ -10438,7 +10890,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
                               ThreadID, ToBeDeletedVec);
      };
    } else {
-@@ -1517,7 +1532,7 @@
+@@ -1545,7 +1549,7 @@
      OI.PostOutlineCB = [=, ToBeDeletedVec =
                                 std::move(ToBeDeleted)](Function &OutlinedFn) {
        hostParallelCallback(this, OutlinedFn, OuterFn, Ident, IfCondition,
@@ -10447,963 +10899,81 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
      };
    }
  
-@@ -2134,39 +2149,1191 @@
-                               /*IsCancellable*/ true);
- }
- 
-+static Value *getGPUWarpSize(Module &M, OpenMPIRBuilder &OMPBuilder) {
-+  return OMPBuilder.Builder.CreateCall(
-+      OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_get_warp_size),
-+      {});
-+}
-+
-+static Value *getGPUThreadID(Module &M, OpenMPIRBuilder &OMPBuilder) {
-+  return OMPBuilder.Builder.CreateCall(
-+      OMPBuilder.getOrCreateRuntimeFunction(
-+          M, OMPRTL___kmpc_get_hardware_thread_id_in_block),
-+      {});
-+}
-+
-+static Value *getGPUNumThreads(Module &M, OpenMPIRBuilder &OMPBuilder) {
-+  const char *LocSize = "__kmpc_get_hardware_num_threads_in_block";
-+  llvm::Function *F = M.getFunction(LocSize);
-+  if (!F) {
-+    LLVMContext &Ctx = M.getContext();
-+    Type *I32Type = Type::getInt32Ty(Ctx);
-+
-+    F = Function::Create(
-+        FunctionType::get(I32Type, std::nullopt, false),
-+        GlobalVariable::ExternalLinkage, LocSize, M);
-+  }
-+  return OMPBuilder.Builder.CreateCall(F, std::nullopt, "nvptx_num_threads");
-+}
-+
-+static Value *getNVPTXWarpID(Module &M, OpenMPIRBuilder &OMPIRBuilder) {
-+  unsigned LaneIDBits =
-+      llvm::Log2_32(OMPIRBuilder.Config.getGridValue().GV_Warp_Size);
-+  return OMPIRBuilder.Builder.CreateAShr(getGPUThreadID(M, OMPIRBuilder),
-+                                         LaneIDBits, "nvptx_warp_id");
-+}
-+
-+static Value *getNVPTXLaneID(Module &M, OpenMPIRBuilder &OMPIRBuilder) {
-+   unsigned LaneIDBits =
-+     llvm::Log2_32(OMPIRBuilder.Config.getGridValue().GV_Warp_Size);
-+  assert(LaneIDBits < 32 && "Invalid LaneIDBits size in NVPTX device.");
-+  unsigned LaneIDMask = ~0u >> (32u - LaneIDBits);
-+  return OMPIRBuilder.Builder.CreateAnd(
-+      getGPUThreadID(M, OMPIRBuilder),
-+      OMPIRBuilder.Builder.getInt32(LaneIDMask), "nvptx_lane_id");
-+}
-+
-+namespace {
-+enum CopyAction : unsigned {
-+  // RemoteLaneToThread: Copy over a Reduce list from a remote lane in
-+  // the warp using shuffle instructions.
-+  RemoteLaneToThread,
-+  // ThreadCopy: Make a copy of a Reduce list on the thread's stack.
-+  ThreadCopy,
-+};
-+} // namespace
-+
-+struct CopyOptionsTy {
-+  llvm::Value *RemoteLaneOffset;
-+  llvm::Value *ScratchpadIndex;
-+  llvm::Value *ScratchpadWidth;
-+};
-+
-+static Value *castValueToType(Module &M, OpenMPIRBuilder &OMPBuilder,
-+                              Value *From, Type *ToType,
-+                              OpenMPIRBuilder::InsertPointTy AllocaIP,
-+                              const OpenMPIRBuilder::LocationDescription &Loc) {
-+  IRBuilder<> &Builder = OMPBuilder.Builder;
-+  Type *FromType = From->getType();
-+  uint64_t FromSize =
-+      divideCeil(M.getDataLayout().getTypeSizeInBits(FromType), 8);
-+  uint64_t ToSize =
-+      divideCeil(M.getDataLayout().getTypeSizeInBits(ToType), 8);
-+  assert(FromSize > 0 && "From size must be greater than zero");
-+  assert(ToSize > 0 && "From size must be greater than zero");
-+  if(FromType == ToType)
-+    return From;
-+  if(FromSize == ToSize)
-+    return Builder.CreateBitCast(From, ToType);
-+  if (ToType->isIntegerTy() && FromType->isIntegerTy())
-+    // FIXME(JAN): Assuming signed integer here, not sure how to find out
-+    // if unsigned
-+    return Builder.CreateIntCast(From, ToType, /*isSigned*/true);
-+  OpenMPIRBuilder::InsertPointTy CurIP = Builder.saveIP();
-+  Builder.restoreIP(AllocaIP);
-+  Value *CastItem = Builder.CreateAlloca(ToType, nullptr, "cast_tmp");
-+  Builder.restoreIP(CurIP);
-+
-+  Value *ValCastItem = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      CastItem, FromType->getPointerTo(), "valcastitem");
-+  Builder.CreateStore(From, ValCastItem);
-+  return Builder.CreateLoad(ToType, CastItem, "castitemload");
-+}
-+
-+static Value *
-+createRuntimeShuffleFunction(Module &M, OpenMPIRBuilder &OMPBuilder,
-+                             const OpenMPIRBuilder::LocationDescription &Loc,
-+                             OpenMPIRBuilder::InsertPointTy AllocaIP,
-+                             Value *Element, Type *ElementType, Value *Offset) {
-+  LLVMContext &Ctx = M.getContext();
-+  IRBuilder<> &Builder = OMPBuilder.Builder;
-+  uint64_t Size =
-+      divideCeil(M.getDataLayout().getTypeSizeInBits(ElementType), 8);
-+  assert(Size <= 8 && "Unsupported bitwidth in shuffle instruction");
-+  Function *ShuffleFunc = OMPBuilder.getOrCreateRuntimeFunctionPtr(
-+      Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
-+                : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
-+  Type *IntType = Builder.getIntNTy(Size <= 4 ? 32 : 64);
-+  Value *ElemCast = Builder.CreateCast(Instruction::SExt, Element, IntType);
-+  Value *WarpSize = getGPUWarpSize(M, OMPBuilder);
-+  Value *WarpSizeCast =
-+      Builder.CreateIntCast(WarpSize, Type::getInt16Ty(Ctx), /*isSigned=*/true);
-+  Value *ShuffleCall =
-+      Builder.CreateCall(ShuffleFunc, {ElemCast, Offset, WarpSizeCast});
-+  return castValueToType(M, OMPBuilder, ShuffleCall, IntType, AllocaIP, Loc);
-+}
-+
-+static void shuffleAndStore(Value *SrcAddr, Value *DstAddr, Type *ElementType,
-+                            llvm::Value *Offset, Type* ReductionArrayTy,
-+                            const OpenMPIRBuilder::LocationDescription &Loc,
-+                            Module &M,
-+                            OpenMPIRBuilder &OMPBuilder,
-+                            OpenMPIRBuilder::InsertPointTy AllocaIP) {
-+  LLVMContext &Ctx = M.getContext();
-+  IRBuilder<> &Builder = OMPBuilder.Builder;
-+  uint64_t Size =
-+      divideCeil(M.getDataLayout().getTypeSizeInBits(ElementType), 8);
-+  Type *PtrTy = PointerType::getUnqual(Ctx);
-+  Value *ElemPtr = DstAddr;
-+  Value *Ptr = SrcAddr;
-+  //  Value *PtrEnd = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+  //Builder.CreateConstGEP1_64(ReductionArrayTy, SrcAddr, 1), PtrTy);
-+  for (int IntSize = 8; IntSize >= 1; IntSize /= 2) {
-+    if(Size < IntSize)
-+      continue;
-+    // FIXME(JAN): Check if there is a function to convert from bytes to bits
-+    Type *IntTy = Builder.getIntNTy(IntSize*8);
-+    Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+        Ptr, IntTy->getPointerTo(), "ptrcast");
-+    ElemPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+        ElemPtr, IntTy->getPointerTo(), "elemptrcast");
-+
-+    // FIXME(JAN): Implement loop to handle larger size
-+    assert(((Size / IntSize) <= 1) && "Unsupported IntSize");
-+    Value *Val = Builder.CreateLoad(IntTy, Ptr);
-+    Value *Res = createRuntimeShuffleFunction(M, OMPBuilder, Loc, AllocaIP, Val,
-+                                              IntTy, Offset);
-+    Builder.CreateStore(Res, ElemPtr);
-+    Ptr = Builder.CreateConstGEP1_64(ReductionArrayTy, Ptr, 1, "ptrgep");
-+    ElemPtr =
-+        Builder.CreateConstGEP1_64(ReductionArrayTy, ElemPtr, 1, "elemptrgep");
-+    Size = Size % IntSize;
-+  }
-+}
-+
-+static void
-+emitReductionListCopy(CopyAction Action, Type *ReductionArrayTy,
-+                      ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
-+                      Value *SrcBase, Value *DestBase,
-+                      Module &M, OpenMPIRBuilder &OMPBuilder,
-+                      const OpenMPIRBuilder::LocationDescription &Loc,
-+                      OpenMPIRBuilder::InsertPointTy AllocaIP,
-+                      CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) {
-+  LLVMContext &Ctx = M.getContext();
-+  IRBuilder<> &Builder = OMPBuilder.Builder;
-+  Type *PtrTy = PointerType::getUnqual(Ctx);
-+
-+  Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
-+
-+  for (auto En : enumerate(ReductionInfos)) {
-+    const OpenMPIRBuilder::ReductionInfo &RI = En.value();
-+    Value *SrcElementAddr = nullptr;
-+    Value *DestElementAddr = nullptr;
-+    Value *DestElementPtrAddr = nullptr;
-+    bool ShuffleInElement = false;
-+    bool UpdateDestListPtr = false;
-+
-+    // Step 1.1: Get the address for the src element in the Reduce list.
-+    Value *SrcElementPtrAddr = Builder.CreateConstGEP2_64(
-+        ReductionArrayTy, SrcBase, 0, En.index(), "srcelementptraddr");
-+    SrcElementAddr =
-+        Builder.CreateLoad(PtrTy, SrcElementPtrAddr, "srcelementaddr");
-+
-+    // Step 1.2: Create a temporary to store the element in the destination
-+    // Reduce list.
-+    DestElementPtrAddr = Builder.CreateInBoundsGEP(
-+        ReductionArrayTy, DestBase,
-+        {Builder.getInt64(0), Builder.getInt64(En.index())},
-+        "destelementptraddr");
-+    switch (Action) {
-+    case RemoteLaneToThread: {
-+      OpenMPIRBuilder::InsertPointTy CurIP = Builder.saveIP();
-+      Builder.restoreIP(AllocaIP);
-+      DestElementAddr = Builder.CreateAlloca(RI.ElementType, nullptr,
-+                                             ".omp.reduction.element");
-+      Builder.restoreIP(CurIP);
-+      ShuffleInElement = true;
-+      UpdateDestListPtr = true;
-+      break;
-+    }
-+    case ThreadCopy: {
-+      DestElementAddr =
-+          Builder.CreateLoad(PtrTy, DestElementPtrAddr, "destelementaddr");
-+      break;
-+    }
-+    }
-+
-+    // FIXME(JAN): Original code in clanguses <Addr>.withElementType(...)
-+    // check if this generates any code
-+
-+    if (ShuffleInElement) {
-+      shuffleAndStore(SrcElementAddr, DestElementAddr,
-+                      RI.ElementType, RemoteLaneOffset,
-+                      ReductionArrayTy, Loc, M,
-+                      OMPBuilder, AllocaIP);
-+    } else {
-+      // FIXME(JAN): Assume Scalar here (TEK_Scalar in Clang)
-+      Value *Elem = Builder.CreateLoad(RI.ElementType, SrcElementAddr);
-+      Builder.CreateStore(Elem, DestElementAddr);
-+    }
-+    // Step 3.1: Modify reference in dest Reduce list as needed.
-+    // Modifying the reference in Reduce list to point to the newly
-+    // created element.  The element is live in the current function
-+    // scope and that of functions it invokes (i.e., reduce_function).
-+    // RemoteReduceData[i] = (void*)&RemoteElem
-+    if (UpdateDestListPtr) {
-+      Value *CastDestAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+          DestElementAddr, PtrTy, "castdestaddr");
-+      Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
-+    }
-+  }
-+}
-+
-+static OpenMPIRBuilder::InsertPointTy getIPAfterInstr(Instruction *I) {
-+  BasicBlock::iterator it(I);
-+  it++;
-+  return OpenMPIRBuilder::InsertPointTy(I->getParent(), it);
-+}
-+
-+
-+static Function *emitShuffleAndReduceFunction(
-+    Module &M, const OpenMPIRBuilder::LocationDescription &Loc,
-+    ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
-+    Function* ReduceFn,
-+    OpenMPIRBuilder &OMPBuilder) {
-+  IRBuilder<> &Builder = OMPBuilder.Builder;
-+
-+  LLVMContext &Ctx = M.getContext();
-+  Type *VoidTy = Type::getVoidTy(Ctx);
-+  Type *PtrTy = PointerType::getUnqual(Ctx);
-+  Type *I16Type = Type::getInt16Ty(Ctx);
-+  auto FuncTy = FunctionType::get(
-+      VoidTy, {PtrTy, I16Type, I16Type, I16Type}, /* IsVarArg */ false);
-+  Function *SarFunc =
-+      Function::Create(FuncTy, GlobalVariable::InternalLinkage,
-+                       "_omp_reduction_shuffle_and_reduce_func", &M);
-+  SarFunc->setDoesNotRecurse();
-+
-+  // Set arg names
-+  Argument *Arg0 = SarFunc->getArg(0);
-+  Argument *Arg1 = SarFunc->getArg(1);
-+  Argument *Arg2 = SarFunc->getArg(2);
-+  Argument *Arg3 = SarFunc->getArg(3);
-+  Arg0->setName("reduce_list_arg");
-+  Arg1->setName("lane_id_arg");
-+  Arg2->setName("remote_lane_offset_arg");
-+  Arg3->setName("algo_ver_arg");
-+
-+  BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "", SarFunc);
-+  Builder.SetInsertPoint(EntryBlock);
-+
-+  Type *Arg0Type = Arg0->getType();
-+  Type *ArgNType = Arg1->getType();
-+  Type *ArgNPtrType = Arg1->getType()->getPointerTo();
-+  Value *ReduceListAlloca =
-+      Builder.CreateAlloca(Arg0Type, nullptr, Arg0->getName() + ".addr");
-+  Value *LaneIdAlloca =
-+      Builder.CreateAlloca(ArgNType, nullptr, Arg1->getName() + ".addr");
-+  Value *RemoteLaneOffsetAlloca =
-+      Builder.CreateAlloca(ArgNType, nullptr, Arg2->getName() + ".addr");
-+  Value *AlgoVerAlloca =
-+      Builder.CreateAlloca(ArgNType, nullptr, Arg3->getName() + ".addr");
-+  // FIXME(Jan): Compute reduction list array type
-+  auto *RedListArrayTy = ArrayType::get(PtrTy, 1);
-+  Instruction *RemoteReductionListAlloca = Builder.CreateAlloca(
-+      RedListArrayTy, nullptr, ".omp.reduction.remote_reduce_list");
-+
-+  Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      ReduceListAlloca, Arg0Type, ReduceListAlloca->getName() + ".acast");
-+  Value *LaneIdAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      LaneIdAlloca, ArgNPtrType, LaneIdAlloca->getName() + ".acast");
-+  Value *RemoteLaneOffsetAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      RemoteLaneOffsetAlloca, ArgNPtrType,
-+      RemoteLaneOffsetAlloca->getName() + ".acast");
-+  Value *AlgoVerAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      AlgoVerAlloca, ArgNPtrType, AlgoVerAlloca->getName() + ".acast");
-+  Value *RemoteListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      RemoteReductionListAlloca, PtrTy,
-+      RemoteReductionListAlloca->getName() + ".acast");
-+
-+  Builder.CreateStore(Arg0, ReduceListAddrCast);
-+  Builder.CreateStore(Arg1, LaneIdAddrCast);
-+  Builder.CreateStore(Arg2, RemoteLaneOffsetAddrCast);
-+  Builder.CreateStore(Arg3, AlgoVerAddrCast);
-+
-+  Value *ReduceList =
-+      Builder.CreateLoad(Arg0Type, ReduceListAddrCast, "reduce_list");
-+  Value *LaneId = Builder.CreateLoad(ArgNType, LaneIdAddrCast, "lane_id");
-+  Value *RemoteLaneOffset = Builder.CreateLoad(
-+      ArgNType, RemoteLaneOffsetAddrCast, "remote_lane_offset");
-+  Value *AlgoVer = Builder.CreateLoad(ArgNType, AlgoVerAddrCast, "algo_ver");
-+
-+  OpenMPIRBuilder::InsertPointTy AllocaIP =
-+    getIPAfterInstr(RemoteReductionListAlloca);
-+  emitReductionListCopy(RemoteLaneToThread, RedListArrayTy, ReductionInfos,
-+                        ReduceList, RemoteListAddrCast, M, OMPBuilder,
-+                        Loc, AllocaIP, {RemoteLaneOffset, nullptr, nullptr});
-+
-+  // The actions to be performed on the Remote Reduce list is dependent
-+  // on the algorithm version.
-+  //
-+  //  if (AlgoVer==0) || (AlgoVer==1 && (LaneId < Offset)) || (AlgoVer==2 &&
-+  //  LaneId % 2 == 0 && Offset > 0):
-+  //    do the reduction value aggregation
-+  //
-+  //  The thread local variable Reduce list is mutated in place to host the
-+  //  reduced data, which is the aggregated value produced from local and
-+  //  remote lanes.
-+  //
-+  //  Note that AlgoVer is expected to be a constant integer known at compile
-+  //  time.
-+  //  When AlgoVer==0, the first conjunction evaluates to true, making
-+  //    the entire predicate true during compile time.
-+  //  When AlgoVer==1, the second conjunction has only the second part to be
-+  //    evaluated during runtime.  Other conjunctions evaluates to false
-+  //    during compile time.
-+  //  When AlgoVer==2, the third conjunction has only the second part to be
-+  //    evaluated during runtime.  Other conjunctions evaluates to false
-+  //    during compile time.
-+  Value *CondAlgo0 = Builder.CreateIsNull(AlgoVer);
-+  Value *Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
-+  Value *LaneComp = Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
-+  Value *CondAlgo1 = Builder.CreateAnd(Algo1, LaneComp);
-+  Value *Algo2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(2));
-+  Value *LaneIdAnd1 = Builder.CreateAnd(LaneId, Builder.getInt16(1));
-+  Value *LaneIdComp = Builder.CreateIsNull(LaneIdAnd1);
-+  Value *Algo2AndLaneIdComp = Builder.CreateAnd(Algo2, LaneIdComp);
-+  Value *RemoteOffsetComp =
-+      Builder.CreateICmpSGT(RemoteLaneOffset, Builder.getInt16(0));
-+  Value *CondAlgo2 = Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
-+  Value *CA0OrCA1 = Builder.CreateOr(CondAlgo0, CondAlgo1);
-+  Value *CondReduce = Builder.CreateOr(CA0OrCA1, CondAlgo2);
-+
-+
-+  BasicBlock *ThenBB = BasicBlock::Create(Ctx, "then", SarFunc);
-+  BasicBlock *ElseBB = BasicBlock::Create(Ctx, "else", SarFunc);
-+  BasicBlock *MergeBB = BasicBlock::Create(Ctx, "ifcont", SarFunc);
-+
-+  Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
-+  Builder.SetInsertPoint(ThenBB);
-+  // reduce_function(LocalReduceList, RemoteReduceList)
-+  Value *LocalReduceListPtr =
-+      Builder.CreatePointerBitCastOrAddrSpaceCast(ReduceList, PtrTy);
-+  Value *RemoteReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      RemoteListAddrCast, PtrTy);
-+  Builder.CreateCall(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
-+  Builder.CreateBr(MergeBB);
-+  Builder.SetInsertPoint(ElseBB);
-+  Builder.CreateBr(MergeBB);
-+  Builder.SetInsertPoint(MergeBB);
-+
-+  Value *Algo1_2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
-+  Value *LaneIdGtOffset = Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
-+  Value *CondCopy = Builder.CreateAnd(Algo1_2, LaneIdGtOffset);
-+
-+  BasicBlock *CpyThenBB = BasicBlock::Create(Ctx, "cpy_then", SarFunc);
-+  BasicBlock *CpyElseBB = BasicBlock::Create(Ctx, "cpy_else", SarFunc);
-+  BasicBlock *CpyMergeBB = BasicBlock::Create(Ctx, "cpy_ifcont", SarFunc);
-+
-+  Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
-+
-+  Builder.SetInsertPoint(CpyThenBB);
-+  emitReductionListCopy(ThreadCopy, RedListArrayTy, ReductionInfos,
-+                        RemoteListAddrCast, ReduceList, M, OMPBuilder,
-+                        Loc, AllocaIP);
-+  Builder.CreateBr(CpyMergeBB);
-+  Builder.SetInsertPoint(CpyElseBB);
-+  Builder.CreateBr(CpyMergeBB);
-+  Builder.SetInsertPoint(CpyMergeBB);
-+  Builder.CreateRetVoid();
-+
-+  return SarFunc;
-+}
-+
-+static Function *emitInterWarpCopyFunction(
-+    Module &M, const OpenMPIRBuilder::LocationDescription &Loc,
-+    ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
-+    OpenMPIRBuilder &OMPBuilder) {
-+  IRBuilder<> &Builder = OMPBuilder.Builder;
-+  OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
-+  LLVMContext &Ctx = M.getContext();
-+  Type *VoidTy = Type::getVoidTy(Ctx);
-+  Type *PtrTy = PointerType::getUnqual(Ctx);
-+  Type *I32Type = Type::getInt32Ty(Ctx);
-+  auto FuncTy =
-+      FunctionType::get(VoidTy, {PtrTy, I32Type}, /* IsVarArg */ false);
-+  Function *WcFunc =
-+      Function::Create(FuncTy, GlobalVariable::InternalLinkage,
-+                       "_omp_reduction_inter_warp_copy_func", &M);
-+  WcFunc->setDoesNotRecurse();
-+
-+  // Set arg names
-+  Argument *Arg0 = WcFunc->getArg(0);
-+  Argument *Arg1 = WcFunc->getArg(1);
-+  Arg0->setName("reduce_list");
-+  Arg1->setName("num_warps");
-+
-+  // Ensure data transfer storage
-+  unsigned WarpSize = OMPBuilder.Config.getGridValue().GV_Warp_Size;
-+  // FIXME(Jan): Not sure about the array type here, but it is I32 in Clang
-+  auto *ArrayTy = ArrayType::get(I32Type, WarpSize);
-+  StringRef TransferMediumName =
-+      "__openmp_nvptx_data_transfer_temporary_storage";
-+  GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName);
-+  if (!TransferMedium) {
-+    unsigned SharedAddressSpace =
-+        3; /* FIXME(Jan): C.getTargetAddressSpace(LangAS::cuda_shared); */
-+    TransferMedium = new GlobalVariable(
-+        M, ArrayTy, /*isConstant=*/false, GlobalVariable::WeakAnyLinkage,
-+        UndefValue::get(ArrayTy), TransferMediumName,
-+        /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal,
-+        SharedAddressSpace);
-+  }
-+
-+  BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "", WcFunc);
-+  Builder.SetInsertPoint(EntryBlock);
-+
-+  Type *Arg0Type = Arg0->getType();
-+  Type *Arg1Type = Arg1->getType();
-+  Value *ReduceListAlloca =
-+      Builder.CreateAlloca(Arg0Type, nullptr, Arg0->getName() + ".addr");
-+  Instruction *NumWarpsAlloca =
-+      Builder.CreateAlloca(Arg1Type, nullptr, Arg1->getName() + ".addr");
-+  Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      ReduceListAlloca, Arg0Type, ReduceListAlloca->getName() + ".acast");
-+  Value *NumWarpsAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      NumWarpsAlloca, Arg1Type->getPointerTo(),
-+      NumWarpsAlloca->getName() + ".acast");
-+  Builder.CreateStore(Arg0, ReduceListAddrCast);
-+  Builder.CreateStore(Arg1, NumWarpsAddrCast);
-+
-+  // Get GPU Info
-+  Value *ThreadID = getGPUThreadID(M, OMPBuilder);
-+  Value *LaneID = getNVPTXLaneID(M, OMPBuilder);
-+  Value *WarpID = getNVPTXWarpID(M, OMPBuilder);
-+
-+  Value *ReduceListArg =
-+      Builder.CreateLoad(PtrTy, ReduceListAddrCast, "reduce_list_arg");
-+
-+  for (auto En : enumerate(ReductionInfos)) {
-+    const OpenMPIRBuilder::ReductionInfo &RI = En.value();
-+    Type *ElementTy = RI.ElementType;
-+    unsigned NumTypeBits = M.getDataLayout().getTypeSizeInBits(ElementTy);
-+    unsigned RealTySize = divideCeil(NumTypeBits, 8);
-+    for (unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
-+      unsigned NumIters = RealTySize/TySize;
-+      if (NumIters == 0)
-+        continue;
-+      //      Type *CopyTy = Builder.getIntNTy(TySize);
-+      Type *Int32Ty = Builder.getInt32Ty();
-+      Value *Cnt = nullptr;
-+      Value *CntAddrAcast = nullptr;
-+      BasicBlock *PrecondBB = nullptr;
-+      BasicBlock *ExitBB = nullptr;
-+
-+      if (NumIters > 1) {
-+        OpenMPIRBuilder::InsertPointTy CurrIP = Builder.saveIP();
-+        Builder.SetInsertPoint(NumWarpsAlloca);
-+        Value *CntAddr = Builder.CreateAlloca(Int32Ty, nullptr, ".cnt.addr");
-+        Builder.restoreIP(CurrIP);
-+        CntAddrAcast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+            CntAddr, PtrTy, CntAddr->getName() + ".acast");
-+        Builder.CreateStore(Constant::getNullValue(Int32Ty), CntAddrAcast);
-+        PrecondBB = BasicBlock::Create(Ctx, "precond", WcFunc);
-+        ExitBB = BasicBlock::Create(Ctx, "exit", WcFunc);
-+        BasicBlock *BodyBB = BasicBlock::Create(Ctx, "body", WcFunc);
-+        Builder.CreateBr(PrecondBB);
-+        Builder.SetInsertPoint(PrecondBB);
-+        Cnt = Builder.CreateLoad(Int32Ty, CntAddrAcast, "cnt");
-+        Value *Cmp = Builder.CreateICmpULT(Cnt, Builder.getInt32(NumIters));
-+        Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
-+        Builder.SetInsertPoint(BodyBB);
-+      }
-+
-+      OMPBuilder.createBarrier(
-+          OpenMPIRBuilder::LocationDescription(Builder.saveIP(), Loc.DL),
-+          omp::Directive::OMPD_unknown,
-+          /* ForceSimpleCall */ false,
-+          /* CheckCancelFlag */ true);
-+      BasicBlock *ThenBB = BasicBlock::Create(Ctx, "then", WcFunc);
-+      BasicBlock *ElseBB = BasicBlock::Create(Ctx, "else", WcFunc);
-+      BasicBlock *MergeBB = BasicBlock::Create(Ctx, "ifcont", WcFunc);
-+
-+      // if (lane_id  == 0)
-+      Value *IsWarpMaster = Builder.CreateIsNull(LaneID, "warp_master");
-+      Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
-+
-+      // then
-+      // Reduce element = LocalReduceList[i]
-+      Builder.SetInsertPoint(ThenBB);
-+      // FIXME(JAN): Should array type be passed in?
-+      auto *RedListArrayTy = ArrayType::get(PtrTy, 1);
-+      // FIXME(JAN): maybe it should be 0,0 and not use En.index()
-+      Value *ReduceListElementPtrPtr = Builder.CreateConstInBoundsGEP2_64(
-+          RedListArrayTy, ReduceListArg, 0, En.index());
-+      Value *ReduceListElementPtr = Builder.CreateLoad(
-+          PtrTy, ReduceListElementPtrPtr, "reduce_list_element_ptr");
-+      if (NumIters > 1)
-+        ReduceListElementPtr = Builder.CreateGEP(Int32Ty, ReduceListElementPtr, Cnt);
-+
-+      Value *TransferElemAddr = Builder.CreateInBoundsGEP(
-+          ArrayTy, TransferMedium, {Builder.getInt64(0), WarpID});
-+      Value *ReduceListElement = Builder.CreateLoad(
-+          I32Type, ReduceListElementPtr, "reduce_list_element");
-+      Builder.CreateStore(ReduceListElement, TransferElemAddr,
-+                          /*IsVolatile*/ true);
-+      Builder.CreateBr(MergeBB);
-+
-+      // else
-+      Builder.SetInsertPoint(ElseBB);
-+      Builder.CreateBr(MergeBB);
-+
-+      // endif
-+      Builder.SetInsertPoint(MergeBB);
-+      OMPBuilder.createBarrier(
-+          OpenMPIRBuilder::LocationDescription(Builder.saveIP(), Loc.DL),
-+          omp::Directive::OMPD_unknown,
-+          /* ForceSimpleCall */ false,
-+          /* CheckCancelFlag */ true);
-+
-+      // Warp 0 copies reduce element from transfer medium
-+      BasicBlock *W0ThenBB = BasicBlock::Create(Ctx, "w0then", WcFunc);
-+      BasicBlock *W0ElseBB = BasicBlock::Create(Ctx, "w0else", WcFunc);
-+      BasicBlock *W0MergeBB = BasicBlock::Create(Ctx, "w0ifcont", WcFunc);
-+
-+      Value *NumWarpsVal =
-+        Builder.CreateLoad(I32Type, NumWarpsAddrCast, "num_warps");
-+      Value *IsActiveThread =
-+        Builder.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread");
-+      Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
-+
-+      // W0then
-+      // SecMEdiumPtr = &medium[tid]
-+      Builder.SetInsertPoint(W0ThenBB);
-+      Value *SrcMediumPtrVal = Builder.CreateInBoundsGEP(
-+          ArrayTy, TransferMedium, {Builder.getInt64(0), ThreadID});
-+      // SrcMediumVal = *SrcMediumPtr
-+      // TODO(JAN): Bitcast here, but no load? skipping for now
-+      Value *TargetElementPtrPtr = Builder.CreateConstInBoundsGEP2_64(
-+          RedListArrayTy, ReduceListArg, 0, En.index());
-+      Value *TargetElementPtr = Builder.CreateLoad(PtrTy, TargetElementPtrPtr);
-+      if (NumIters > 1)
-+        TargetElementPtr = Builder.CreateGEP(Int32Ty, TargetElementPtr, Cnt);
-+
-+      Value *SrcMediumValue =
-+          Builder.CreateLoad(I32Type, SrcMediumPtrVal, /*IsVolatile*/ true);
-+      Builder.CreateStore(SrcMediumValue, TargetElementPtr);
-+      Builder.CreateBr(W0MergeBB);
-+
-+      // W0else
-+      Builder.SetInsertPoint(W0ElseBB);
-+      Builder.CreateBr(W0MergeBB);
-+
-+      // W0endif
-+      Builder.SetInsertPoint(W0MergeBB);
-+      if (NumIters > 1) {
-+        Cnt = Builder.CreateNSWAdd(Cnt, Builder.getInt32(1));
-+        Builder.CreateStore(Cnt, CntAddrAcast);
-+        Builder.CreateBr(PrecondBB);
-+        Builder.SetInsertPoint(ExitBB);
-+      }
-+    }
-+  }
-+
-+  Builder.CreateRetVoid();
-+  Builder.restoreIP(OldIP);
-+  return WcFunc;
-+}
-+
-+/// This function emits a helper that copies all the reduction variables from
-+/// the team into the provided global buffer for the reduction variables.
-+///
-+/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data)
-+///   For all data entries D in reduce_data:
-+///     Copy local D to buffer.D[Idx]
-+static Function *emitListToGlobalCopyFunction(
-+    Module &M, const OpenMPIRBuilder::LocationDescription &Loc,
-+    ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
-+    OpenMPIRBuilder &OMPBuilder) {
-+  IRBuilder<> &Builder = OMPBuilder.Builder;
-+  OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
-+  LLVMContext &Ctx = M.getContext();
-+  Type *VoidTy = Type::getVoidTy(Ctx);
-+  Type *Int32Ty = Builder.getInt32Ty();
-+  Type *PtrTy = PointerType::getUnqual(Ctx);
-+  auto FuncTy =
-+      FunctionType::get(VoidTy, {PtrTy, Int32Ty, PtrTy}, /* IsVarArg */ false);
-+  Function *LtGCFunc =
-+      Function::Create(FuncTy, GlobalVariable::InternalLinkage,
-+                       "_omp_reduction_list_to_global_copy_func", &M);
-+  LtGCFunc->setDoesNotRecurse();
-+
-+  BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "", LtGCFunc);
-+  Builder.SetInsertPoint(EntryBlock);
-+
-+  // Set arg names
-+  Argument *Arg0 = LtGCFunc->getArg(0);
-+  Argument *Arg1 = LtGCFunc->getArg(1);
-+  Argument *Arg2 = LtGCFunc->getArg(2);
-+  Arg0->setName("buffer_arg");
-+  Arg1->setName("idx_arg");
-+  Arg2->setName("reduce_list_arg");
-+
-+  Value *BufferArgAlloca =
-+      Builder.CreateAlloca(PtrTy, nullptr, Arg0->getName() + ".addr");
-+  Value *IdxArgAlloca =
-+      Builder.CreateAlloca(Int32Ty, nullptr, Arg1->getName() + ".addr");
-+  Value *ReduceListArgAlloca =
-+      Builder.CreateAlloca(PtrTy, nullptr, Arg2->getName() + ".addr");
-+  Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      BufferArgAlloca, PtrTy, BufferArgAlloca->getName() + ".acast");
-+  Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      IdxArgAlloca, PtrTy, IdxArgAlloca->getName() + ".acast");
-+  Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      ReduceListArgAlloca, PtrTy, ReduceListArgAlloca->getName() + ".acast");
-+  // FIXME(JAN): Assume a single globalized variable for now, this should be
-+  // passed in
-+  Type *SingleReductionTy = ReductionInfos.begin()->ElementType;
-+  Type *TypeArgs[] = {SingleReductionTy};
-+  StructType *ReductionsBufferTy =
-+      StructType::create(Ctx, TypeArgs, "_globalized_locals_ty");
-+
-+  Builder.CreateStore(Arg0, BufferArgAddrCast);
-+  Builder.CreateStore(Arg1, IdxArgAddrCast);
-+  Builder.CreateStore(Arg2, ReduceListArgAddrCast);
-+
-+  Value *BufferArg = Builder.CreateLoad(PtrTy, BufferArgAddrCast, "buffer");
-+  Value *Idxs[] = {
-+      Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast, "idxs")};
-+  Value *ReduceListArg =
-+      Builder.CreateLoad(PtrTy, ReduceListArgAddrCast, "reduce_list");
-+  // FIXME(Jan): Assume TEK_SCALAR
-+  for (auto En : enumerate(ReductionInfos)) {
-+    const OpenMPIRBuilder::ReductionInfo &RI = En.value();
-+    // FIXME(Jan): Compute array type
-+    auto *RedListArrayTy = ArrayType::get(PtrTy, 1);
-+    Value *TargetElementPtrPtr = Builder.CreateConstInBoundsGEP2_64(
-+        RedListArrayTy, ReduceListArg, 0, En.index());
-+    Value *TargetElementPtr = Builder.CreateLoad(PtrTy, TargetElementPtrPtr);
-+
-+    Value *BufferVD =
-+        Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArg, Idxs);
-+    Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
-+        ReductionsBufferTy, BufferVD, 0, En.index());
-+    Value *TargetElement = Builder.CreateLoad(RI.ElementType, TargetElementPtr);
-+    Builder.CreateStore(TargetElement, GlobValPtr);
-+  }
-+
-+  Builder.CreateRetVoid();
-+  Builder.restoreIP(OldIP);
-+  return LtGCFunc;
-+}
-+
-+/// This function emits a helper that copies all the reduction variables from
-+/// the team into the provided global buffer for the reduction variables.
-+///
-+/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data)
-+///   For all data entries D in reduce_data:
-+///     Copy local D to buffer.D[Idx]
-+static Function *emitGlobalToListCopyFunction(
-+    Module &M, const OpenMPIRBuilder::LocationDescription &Loc,
-+    ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
-+    OpenMPIRBuilder &OMPBuilder) {
-+  IRBuilder<> &Builder = OMPBuilder.Builder;
-+  OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
-+  LLVMContext &Ctx = M.getContext();
-+  Type *VoidTy = Type::getVoidTy(Ctx);
-+  Type *Int32Ty = Builder.getInt32Ty();
-+  Type *PtrTy = PointerType::getUnqual(Ctx);
-+  auto FuncTy =
-+      FunctionType::get(VoidTy, {PtrTy, Int32Ty, PtrTy}, /* IsVarArg */ false);
-+  Function *LtGCFunc =
-+      Function::Create(FuncTy, GlobalVariable::InternalLinkage,
-+                       "_omp_reduction_global_to_list_copy_func", &M);
-+  LtGCFunc->setDoesNotRecurse();
-+
-+  BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "", LtGCFunc);
-+  Builder.SetInsertPoint(EntryBlock);
-+
-+  // Set arg names
-+  Argument *Arg0 = LtGCFunc->getArg(0);
-+  Argument *Arg1 = LtGCFunc->getArg(1);
-+  Argument *Arg2 = LtGCFunc->getArg(2);
-+  Arg0->setName("buffer_arg");
-+  Arg1->setName("idx_arg");
-+  Arg2->setName("reduce_list_arg");
-+
-+  Value *BufferArgAlloca =
-+      Builder.CreateAlloca(PtrTy, nullptr, Arg0->getName() + ".addr");
-+  Value *IdxArgAlloca =
-+      Builder.CreateAlloca(Int32Ty, nullptr, Arg1->getName() + ".addr");
-+  Value *ReduceListArgAlloca =
-+      Builder.CreateAlloca(PtrTy, nullptr, Arg2->getName() + ".addr");
-+  Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      BufferArgAlloca, PtrTy, BufferArgAlloca->getName() + ".acast");
-+  Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      IdxArgAlloca, PtrTy, IdxArgAlloca->getName() + ".acast");
-+  Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      ReduceListArgAlloca, PtrTy, ReduceListArgAlloca->getName() + ".acast");
-+  // FIXME(JAN): Assume a single globalized variable for now, this should be
-+  // passed in
-+  Type *SingleReductionTy = ReductionInfos.begin()->ElementType;
-+  Type *TypeArgs[] = {SingleReductionTy};
-+  StructType *ReductionsBufferTy =
-+      StructType::create(Ctx, TypeArgs, "_globalized_locals_ty");
-+
-+  Builder.CreateStore(Arg0, BufferArgAddrCast);
-+  Builder.CreateStore(Arg1, IdxArgAddrCast);
-+  Builder.CreateStore(Arg2, ReduceListArgAddrCast);
-+
-+  Value *BufferArg = Builder.CreateLoad(PtrTy, BufferArgAddrCast, "buffer");
-+  Value *Idxs[] = {
-+      Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast, "idxs")};
-+  Value *ReduceListArg =
-+      Builder.CreateLoad(PtrTy, ReduceListArgAddrCast, "reduce_list");
-+  // FIXME(Jan): Assume TEK_SCALAR
-+  for (auto En : enumerate(ReductionInfos)) {
-+    const OpenMPIRBuilder::ReductionInfo &RI = En.value();
-+    // FIXME(Jan): Compute array type
-+    auto *RedListArrayTy = ArrayType::get(PtrTy, 1);
-+    Value *TargetElementPtrPtr = Builder.CreateConstInBoundsGEP2_64(
-+        RedListArrayTy, ReduceListArg, 0, En.index());
-+    Value *TargetElementPtr = Builder.CreateLoad(PtrTy, TargetElementPtrPtr);
-+
-+    Value *BufferVD =
-+        Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArg, Idxs);
-+    Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
-+        ReductionsBufferTy, BufferVD, 0, En.index());
-+    Value *TargetElement = Builder.CreateLoad(RI.ElementType, GlobValPtr);
-+    Builder.CreateStore(TargetElement, TargetElementPtr);
-+  }
-+
-+  Builder.CreateRetVoid();
-+  Builder.restoreIP(OldIP);
-+  return LtGCFunc;
-+}
-+
-+/// This function emits a helper that reduces all the reduction variables from
-+/// the team into the provided global buffer for the reduction variables.
-+///
-+/// void list_to_global_reduce_func(void *buffer, int Idx, void *reduce_data)
-+///  void *GlobPtrs[];
-+///  GlobPtrs[0] = (void*)&buffer.D0[Idx];
-+///  ...
-+///  GlobPtrs[N] = (void*)&buffer.DN[Idx];
-+///  reduce_function(GlobPtrs, reduce_data);
-+/// Create a function with a unique name and a "void (i8*, i8*)" signature in
-+/// the given module and return it.
-+static Function *emitListToGlobalReduceFunction(
-+    Module &M, const OpenMPIRBuilder::LocationDescription &Loc,
-+    ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos, Function *ReduceFn,
-+    OpenMPIRBuilder &OMPBuilder) {
-+  IRBuilder<> &Builder = OMPBuilder.Builder;
-+  OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
-+  LLVMContext &Ctx = M.getContext();
-+  Type *VoidTy = Type::getVoidTy(Ctx);
-+  Type *Int32Ty = Builder.getInt32Ty();
-+  Type *PtrTy = PointerType::getUnqual(Ctx);
-+  auto FuncTy =
-+      FunctionType::get(VoidTy, {PtrTy, Int32Ty, PtrTy}, /* IsVarArg */ false);
-+  Function *LtGRFunc =
-+      Function::Create(FuncTy, GlobalVariable::InternalLinkage,
-+                       "_omp_reduction_list_to_global_reduce_func", &M);
-+  LtGRFunc->setDoesNotRecurse();
-+
-+  BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "", LtGRFunc);
-+  Builder.SetInsertPoint(EntryBlock);
-+
-+  // Set arg names
-+  Argument *Arg0 = LtGRFunc->getArg(0);
-+  Argument *Arg1 = LtGRFunc->getArg(1);
-+  Argument *Arg2 = LtGRFunc->getArg(2);
-+  Arg0->setName("buffer_arg");
-+  Arg1->setName("idx_arg");
-+  Arg2->setName("reduce_list_arg");
-+
-+  Value *BufferArgAlloca =
-+      Builder.CreateAlloca(PtrTy, nullptr, Arg0->getName() + ".addr");
-+  Value *IdxArgAlloca =
-+      Builder.CreateAlloca(Int32Ty, nullptr, Arg1->getName() + ".addr");
-+  Value *ReduceListArgAlloca =
-+      Builder.CreateAlloca(PtrTy, nullptr, Arg2->getName() + ".addr");
-+  // FIXME(Jan): Compute array type
-+  auto *RedListArrayTy = ArrayType::get(PtrTy, 1);
-+  Value *LocalReduceList =
-+      Builder.CreateAlloca(RedListArrayTy, nullptr, ".omp.reduction.red_list");
-+
-+  Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      BufferArgAlloca, PtrTy, BufferArgAlloca->getName() + ".acast");
-+  Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      IdxArgAlloca, PtrTy, IdxArgAlloca->getName() + ".acast");
-+  Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      ReduceListArgAlloca, PtrTy, ReduceListArgAlloca->getName() + ".acast");
-+  Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      LocalReduceList, PtrTy, LocalReduceList->getName() + ".acast");
-+  // FIXME(JAN): Assume a single globalized variable for now, this should be
-+  // passed in
-+  Type *SingleReductionTy = ReductionInfos.begin()->ElementType;
-+  Type *TypeArgs[] = {SingleReductionTy};
-+  StructType *ReductionsBufferTy =
-+      StructType::create(Ctx, TypeArgs, "_globalized_locals_ty");
-+
-+  Builder.CreateStore(Arg0, BufferArgAddrCast);
-+  Builder.CreateStore(Arg1, IdxArgAddrCast);
-+  Builder.CreateStore(Arg2, ReduceListArgAddrCast);
-+
-+  Value *BufferArg = Builder.CreateLoad(PtrTy, BufferArgAddrCast, "buffer");
-+  Value *Idxs[] = {Builder.CreateLoad(Int32Ty, IdxArgAddrCast, "idxs")};
-+  // FIXME(Jan): Assume TEK_SCALAR
-+  for (auto En : enumerate(ReductionInfos)) {
-+    const OpenMPIRBuilder::ReductionInfo &RI = En.value();
-+    Value *TargetElementPtrPtr = Builder.CreateConstInBoundsGEP2_64(
-+        RedListArrayTy, LocalReduceListAddrCast, 0, En.index());
-+    Value *BufferVD =
-+        Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArg, Idxs);
-+    Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
-+        ReductionsBufferTy, BufferVD, 0, En.index());
-+    Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
+@@ -3466,9 +3470,9 @@
+ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
+     const LocationDescription &Loc, InsertPointTy AllocaIP,
+     InsertPointTy CodeGenIP, ArrayRef<ReductionInfo> ReductionInfos,
+-    bool IsNoWait, bool IsTeamsReduction, bool HasDistribute,
+-    ReductionGenCBKind ReductionGenCBKind, std::optional<omp::GV> GridValue,
+-    unsigned ReductionBufNum, Value *SrcLocInfo) {
++    bool IsNoWait, bool IsTeamsReduction, ReductionGenCBKind ReductionGenCBKind,
++    std::optional<omp::GV> GridValue, unsigned ReductionBufNum,
++    Value *SrcLocInfo) {
+   if (!updateToLocation(Loc))
+     return InsertPointTy();
+   Builder.restoreIP(CodeGenIP);
+@@ -3485,6 +3489,16 @@
+   if (ReductionInfos.size() == 0)
+     return Builder.saveIP();
+ 
++  BasicBlock *ContinuationBlock = nullptr;
++  if (ReductionGenCBKind != ReductionGenCBKind::Clang) {
++    // Copied code from createReductions
++    BasicBlock *InsertBlock = Loc.IP.getBlock();
++    ContinuationBlock =
++        InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
++    InsertBlock->getTerminator()->eraseFromParent();
++    Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
 +  }
 +
-+  Value *ReduceList = Builder.CreateLoad(PtrTy, ReduceListArgAddrCast);
-+  Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList});
-+  Builder.CreateRetVoid();
-+  Builder.restoreIP(OldIP);
-+  return LtGRFunc;
-+}
-+
-+/// This function emits a helper that reduces all the reduction variables from
-+/// the team into the provided global buffer for the reduction variables.
-+///
-+/// void list_to_global_reduce_func(void *buffer, int Idx, void *reduce_data)
-+///  void *GlobPtrs[];
-+///  GlobPtrs[0] = (void*)&buffer.D0[Idx];
-+///  ...
-+///  GlobPtrs[N] = (void*)&buffer.DN[Idx];
-+///  reduce_function(GlobPtrs, reduce_data);
- /// Create a function with a unique name and a "void (i8*, i8*)" signature in
- /// the given module and return it.
--Function *getFreshReductionFunc(Module &M) {
-+static Function *emitGlobalToListReduceFunction(
-+    Module &M, const OpenMPIRBuilder::LocationDescription &Loc,
-+    ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos, Function *ReduceFn,
-+    OpenMPIRBuilder &OMPBuilder) {
-+  IRBuilder<> &Builder = OMPBuilder.Builder;
-+  OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
-+  LLVMContext &Ctx = M.getContext();
-+  Type *VoidTy = Type::getVoidTy(Ctx);
-+  Type *Int32Ty = Builder.getInt32Ty();
-+  Type *PtrTy = PointerType::getUnqual(Ctx);
-+  auto FuncTy =
-+      FunctionType::get(VoidTy, {PtrTy, Int32Ty, PtrTy}, /* IsVarArg */ false);
-+  Function *LtGRFunc =
-+      Function::Create(FuncTy, GlobalVariable::InternalLinkage,
-+                       "_omp_reduction_global_to_list_reduce_func", &M);
-+  LtGRFunc->setDoesNotRecurse();
-+
-+  BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "", LtGRFunc);
-+  Builder.SetInsertPoint(EntryBlock);
-+
-+  // Set arg names
-+  Argument *Arg0 = LtGRFunc->getArg(0);
-+  Argument *Arg1 = LtGRFunc->getArg(1);
-+  Argument *Arg2 = LtGRFunc->getArg(2);
-+  Arg0->setName("buffer_arg");
-+  Arg1->setName("idx_arg");
-+  Arg2->setName("reduce_list_arg");
-+
-+  Value *BufferArgAlloca =
-+      Builder.CreateAlloca(PtrTy, nullptr, Arg0->getName() + ".addr");
-+  Value *IdxArgAlloca =
-+      Builder.CreateAlloca(Int32Ty, nullptr, Arg1->getName() + ".addr");
-+  Value *ReduceListArgAlloca =
-+      Builder.CreateAlloca(PtrTy, nullptr, Arg2->getName() + ".addr");
-+  // FIXME(Jan): Compute array type
-+  auto *RedListArrayTy = ArrayType::get(PtrTy, 1);
-+  Value *LocalReduceList =
-+      Builder.CreateAlloca(RedListArrayTy, nullptr, ".omp.reduction.red_list");
-+
-+  Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      BufferArgAlloca, PtrTy, BufferArgAlloca->getName() + ".acast");
-+  Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      IdxArgAlloca, PtrTy, IdxArgAlloca->getName() + ".acast");
-+  Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      ReduceListArgAlloca, PtrTy, ReduceListArgAlloca->getName() + ".acast");
-+  Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
-+      LocalReduceList, PtrTy, LocalReduceList->getName() + ".acast");
-+  // FIXME(JAN): Assume a single globalized variable for now, this should be
-+  // passed in
-+  Type *SingleReductionTy = ReductionInfos.begin()->ElementType;
-+  Type *TypeArgs[] = {SingleReductionTy};
-+  StructType *ReductionsBufferTy =
-+      StructType::create(Ctx, TypeArgs, "_globalized_locals_ty");
-+
-+  Builder.CreateStore(Arg0, BufferArgAddrCast);
-+  Builder.CreateStore(Arg1, IdxArgAddrCast);
-+  Builder.CreateStore(Arg2, ReduceListArgAddrCast);
-+
-+  Value *BufferArg = Builder.CreateLoad(PtrTy, BufferArgAddrCast, "buffer");
-+  Value *Idxs[] = {Builder.CreateLoad(Int32Ty, IdxArgAddrCast, "idxs")};
-+  // FIXME(Jan): Assume TEK_SCALAR
-+  for (auto En : enumerate(ReductionInfos)) {
-+    const OpenMPIRBuilder::ReductionInfo &RI = En.value();
-+    Value *TargetElementPtrPtr = Builder.CreateConstInBoundsGEP2_64(
-+        RedListArrayTy, LocalReduceListAddrCast, 0, En.index());
-+    Value *BufferVD =
-+        Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArg, Idxs);
-+    Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
-+        ReductionsBufferTy, BufferVD, 0, En.index());
-+    Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
+   Function *CurFunc = Builder.GetInsertBlock()->getParent();
+   AttributeList FuncAttrs;
+   AttrBuilder AttrBldr(Ctx);
+@@ -3640,11 +3654,21 @@
+                ReductionFunc;
+       });
+     } else {
+-      assert(false && "Unhandled ReductionGenCBKind");
++      Value *LHSValue = Builder.CreateLoad(RI.ElementType, LHS, "final.lhs");
++      Value *RHSValue = Builder.CreateLoad(RI.ElementType, RHS, "final.rhs");
++      Value *Reduced;
++      InsertPointOrErrorTy AfterIP =
++          RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced);
++      if (!AfterIP)
++        return AfterIP.takeError();
++      Builder.CreateStore(Reduced, LHS, false);
+     }
+   }
+   emitBlock(ExitBB, CurFunc);
+-
++  if (ContinuationBlock) {
++    Builder.CreateBr(ContinuationBlock);
++    Builder.SetInsertPoint(ContinuationBlock);
 +  }
-+
-+  Value *ReduceList = Builder.CreateLoad(PtrTy, ReduceListArgAddrCast);
-+  Builder.CreateCall(ReduceFn, {ReduceList, LocalReduceListAddrCast});
-+  Builder.CreateRetVoid();
-+  Builder.restoreIP(OldIP);
-+  return LtGRFunc;
-+}
-+
-+static Function *getFreshReductionFunc(Module &M) {
-   Type *VoidTy = Type::getVoidTy(M.getContext());
-   Type *Int8PtrTy = PointerType::getUnqual(M.getContext());
-   auto *FuncTy =
-       FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false);
-   return Function::Create(FuncTy, GlobalVariable::InternalLinkage,
--                          M.getDataLayout().getDefaultGlobalsAddressSpace(),
+   Config.setEmitLLVMUsed();
+ 
+   return Builder.saveIP();
+@@ -3659,27 +3683,95 @@
                            ".omp.reduction.func", &M);
  }
  
--OpenMPIRBuilder::InsertPointTy
+-OpenMPIRBuilder::InsertPointOrErrorTy
 -OpenMPIRBuilder::createReductions(const LocationDescription &Loc,
 -                                  InsertPointTy AllocaIP,
 -                                  ArrayRef<ReductionInfo> ReductionInfos,
 -                                  ArrayRef<bool> IsByRef, bool IsNoWait) {
 -  assert(ReductionInfos.size() == IsByRef.size());
 -  for (const ReductionInfo &RI : ReductionInfos) {
-+static void populateReductionFunction(
+-    (void)RI;
+-    assert(RI.Variable && "expected non-null variable");
+-    assert(RI.PrivateVariable && "expected non-null private variable");
+-    assert(RI.ReductionGen && "expected non-null reduction generator callback");
+-    assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
+-           "expected variables and their private equivalents to have the same "
+-           "type");
+-    assert(RI.Variable->getType()->isPointerTy() &&
+-           "expected variables to be pointers");
++static Error populateReductionFunction(
 +    Function *ReductionFunc,
 +    ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
 +    IRBuilder<> &Builder, ArrayRef<bool> IsByRef, bool IsGPU) {
@@ -11437,8 +11007,8 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
 +  } else {
 +    LHSArrayPtr = ReductionFunc->getArg(0);
 +    RHSArrayPtr = ReductionFunc->getArg(1);
-+  }
-+
+   }
+ 
 +  unsigned NumReductions = ReductionInfos.size();
 +  Type *RedArrayTy = ArrayType::get(Builder.getPtrTy(), NumReductions);
 +
@@ -11457,191 +11027,35 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
 +        RHSI8Ptr, RI.PrivateVariable->getType());
 +    Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
 +    Value *Reduced;
-+    Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
++    OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
++        RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced);
++    if (!AfterIP)
++      return AfterIP.takeError();
++
++    Builder.restoreIP(*AfterIP);
++    // TODO: Consider flagging an error.
 +    if (!Builder.GetInsertBlock())
-+      return;
++      return Error::success();
++
 +    // store is inside of the reduction region when using by-ref
 +    if (!IsByRef[En.index()])
 +      Builder.CreateStore(Reduced, LHSPtr);
 +  }
 +  Builder.CreateRetVoid();
++  return Error::success();
 +}
 +
-+static void
-+checkReductionInfos(ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
-+                    bool IsGPU) {
-+  for (const OpenMPIRBuilder::ReductionInfo &RI : ReductionInfos) {
-     (void)RI;
-     assert(RI.Variable && "expected non-null variable");
-     assert(RI.PrivateVariable && "expected non-null private variable");
-     assert(RI.ReductionGen && "expected non-null reduction generator callback");
--    assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
--           "expected variables and their private equivalents to have the same "
--           "type");
-+    // JAN: Skip this assertion for GPU, address spaces are present
-+    if (!IsGPU) {
-+      assert(
-+          RI.Variable->getType() == RI.PrivateVariable->getType() &&
-+          "expected variables and their private equivalents to have the same "
-+          "type");
-+    }
-     assert(RI.Variable->getType()->isPointerTy() &&
-            "expected variables to be pointers");
-   }
-+}
-+
-+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductionsGPU(
-+    const LocationDescription &Loc, InsertPointTy AllocaIP,
-+    ArrayRef<ReductionInfo> ReductionInfos, ArrayRef<bool> IsByRef,
-+    bool IsNoWait, bool IsTeamsReduction, bool HasDistribute) {
-+  checkReductionInfos(ReductionInfos, /*IsGPU*/ true);
-+  LLVMContext &Ctx = M.getContext();
-+  if (!updateToLocation(Loc))
-+    return InsertPointTy();
-+
-+  if (ReductionInfos.size() == 0)
-+    return Builder.saveIP();
-+
-+  assert(ReductionInfos.size() == 1 && "More than one reduction variable");
-+
-+  // Copied code from createReductions
-+  BasicBlock *InsertBlock = Loc.IP.getBlock();
-+  BasicBlock *ContinuationBlock =
-+      InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
-+  InsertBlock->getTerminator()->eraseFromParent();
-+  Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
-+
-+  Function *ReductionFunc = nullptr;
-+  if (GLOBAL_ReductionFunc) {
-+    ReductionFunc = GLOBAL_ReductionFunc;
-+  } else {
-+    ReductionFunc = getFreshReductionFunc(M);
-+    GLOBAL_ReductionFunc = ReductionFunc;
-+    InsertPointTy CurIP = Builder.saveIP();
-+    populateReductionFunction(ReductionFunc, ReductionInfos, Builder, IsByRef,
-+                              true);
-+    Builder.restoreIP(CurIP);
-+  }
-+
-+  uint32_t SrcLocStrSize;
-+  Constant *SrcLocStr = getOrCreateDefaultSrcLocStr(SrcLocStrSize);
-+  Value *RTLoc =
-+      getOrCreateIdent(SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(0), 0);
-+
-+  // 1. Build a list of reduction variables
-+  auto Size = ReductionInfos.size();
-+  // FIXME(JAN): skipping variably modified type storage for array size
-+  Type *PtrTy = PointerType::getUnqual(Ctx);
-+  Type *RedArrayTy = ArrayType::get(PtrTy, Size);
-+  InsertPointTy CurIP = Builder.saveIP();
-+  Builder.restoreIP(AllocaIP);
-+  Value *ReductionListAlloca =
-+      Builder.CreateAlloca(RedArrayTy, nullptr, ".omp.reduction.red_list");
-+  Value *ReductionList =
-+      Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionListAlloca, PtrTy);
-+  Builder.restoreIP(CurIP);
-+  for (auto En : enumerate(ReductionInfos)) {
-+    const ReductionInfo &RI = En.value();
-+    Value *ElemPtr = Builder.CreateConstGEP2_64(RedArrayTy, ReductionList, 0,
-+                                                En.index(), "elem_ptr");
-+    Value *CastElem =
-+        Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
-+    Builder.CreateStore(CastElem, ElemPtr);
-+  }
-+  CurIP = Builder.saveIP();
-+  Function *SarFunc = emitShuffleAndReduceFunction(M, Loc, ReductionInfos,
-+                                                   ReductionFunc, *this);
-+  Function *WcFunc = emitInterWarpCopyFunction(M, Loc, ReductionInfos, *this);
-+  Builder.restoreIP(CurIP);
-+
-+  Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
-+  Value *ReductionDataSize =
-+      getTypeSizeInBytesValue(Builder, M, ReductionInfos.begin()->ElementType);
-+
-+  Value *Res;
-+  if (!IsTeamsReduction) {
-+    Value *SarFuncCast =
-+        Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, PtrTy);
-+    Value *WcFuncCast =
-+        Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, PtrTy);
-+    Value *Args[] = {RTLoc, ReductionDataSize, RL, SarFuncCast, WcFuncCast};
-+    Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
-+        RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
-+    Res = Builder.CreateCall(Pv2Ptr, Args);
-+  } else {
-+    CurIP = Builder.saveIP();
-+    Function *LtGCFunc =
-+        emitListToGlobalCopyFunction(M, Loc, ReductionInfos, *this);
-+    Function *LtGRFunc = emitListToGlobalReduceFunction(M, Loc, ReductionInfos,
-+                                                        ReductionFunc, *this);
-+    Function *GtLCFunc =
-+        emitGlobalToListCopyFunction(M, Loc, ReductionInfos, *this);
-+    Function *GtLRFunc = emitGlobalToListReduceFunction(M, Loc, ReductionInfos,
-+                                                        ReductionFunc, *this);
-+    Builder.restoreIP(CurIP);
-+
-+    Function *RedFixedBuferFn = getOrCreateRuntimeFunctionPtr(
-+        RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
-+
-+    Value *KernelTeamsReductionPtr = Builder.CreateCall(RedFixedBuferFn, {});
-+
-+    Value *Args3[] = {RTLoc,
-+                      KernelTeamsReductionPtr,
-+                      Builder.getInt32(1024),
-+                      ReductionDataSize,
-+                      RL,
-+                      SarFunc,
-+                      WcFunc,
-+                      LtGCFunc,
-+                      LtGRFunc,
-+                      GtLCFunc,
-+                      GtLRFunc};
-+
-+    Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr(
-+        RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
-+    Res = Builder.CreateCall(TeamsReduceFn, Args3);
-+  }
-+
-+  if (IsTeamsReduction || !HasDistribute) {
-+    Function *CurFunc = Builder.GetInsertBlock()->getParent();
-+    BasicBlock *ExitBB =
-+        BasicBlock::Create(Ctx, ".omp.reduction.done", CurFunc);
-+    BasicBlock *ThenBB =
-+        BasicBlock::Create(Ctx, ".omp.reduction.then", CurFunc);
-+    Value *Cond = Builder.CreateICmpEQ(Res, Builder.getInt32(1));
-+    Builder.CreateCondBr(Cond, ThenBB, ExitBB);
-+
-+    Builder.SetInsertPoint(ThenBB);
-+    for (auto En : enumerate(ReductionInfos)) {
-+      const ReductionInfo &RI = En.value();
-+      Value *InputVal = Builder.CreateLoad(RI.ElementType, RI.Variable);
-+      Value *RedVal = Builder.CreateLoad(
-+          RI.ElementType, Builder.CreatePointerBitCastOrAddrSpaceCast(
-+                              RI.PrivateVariable, PtrTy));
-+      Value *sum;
-+      Builder.restoreIP(
-+          RI.ReductionGen(Builder.saveIP(), InputVal, RedVal, sum));
-+      Builder.CreateStore(sum, RI.Variable);
-+      Builder.CreateBr(ExitBB);
-+    }
-+    Builder.SetInsertPoint(ExitBB);
-+  }
-+  Builder.CreateBr(ContinuationBlock);
-+  Builder.SetInsertPoint(ContinuationBlock);
-+  return Builder.saveIP();
-+}
-+
-+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
++OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
 +    const LocationDescription &Loc, InsertPointTy AllocaIP,
 +    ArrayRef<ReductionInfo> ReductionInfos, ArrayRef<bool> IsByRef,
-+    bool IsNoWait, bool IsTeamsReduction, bool HasDistribute) {
++    bool IsNoWait, bool IsTeamsReduction) {
 +  assert(ReductionInfos.size() == IsByRef.size());
 +  if (Config.isGPU())
-+    return createReductionsGPU(Loc, AllocaIP, ReductionInfos, IsByRef, IsNoWait,
-+                               IsTeamsReduction, HasDistribute);
++    return createReductionsGPU(Loc, AllocaIP, Builder.saveIP(), ReductionInfos,
++                               IsNoWait, IsTeamsReduction);
 +
 +  checkReductionInfos(ReductionInfos, /*IsGPU*/ false);
- 
++
    if (!updateToLocation(Loc))
      return InsertPointTy();
  
@@ -11651,7 +11065,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
    BasicBlock *InsertBlock = Loc.IP.getBlock();
    BasicBlock *ContinuationBlock =
        InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
-@@ -2231,9 +3398,9 @@
+@@ -3743,9 +3835,9 @@
    Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
    Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
  
@@ -11664,7 +11078,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
    Builder.SetInsertPoint(NonAtomicRedBlock);
    for (auto En : enumerate(ReductionInfos)) {
      const ReductionInfo &RI = En.value();
-@@ -2287,35 +3454,8 @@
+@@ -3801,38 +3893,13 @@
    // Populate the outlined reduction function using the elementwise reduction
    // function. Partial values are extracted from the type-erased array of
    // pointers to private variables.
@@ -11673,7 +11087,11 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
 -  Builder.SetInsertPoint(ReductionFuncBlock);
 -  Value *LHSArrayPtr = ReductionFunc->getArg(0);
 -  Value *RHSArrayPtr = ReductionFunc->getArg(1);
--
++  Error Err = populateReductionFunction(ReductionFunc, ReductionInfos, Builder,
++                                        IsByRef, false);
++  if (Err)
++    return Err;
+ 
 -  for (auto En : enumerate(ReductionInfos)) {
 -    const ReductionInfo &RI = En.value();
 -    Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
@@ -11688,7 +11106,11 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
 -        Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
 -    Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
 -    Value *Reduced;
--    Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
+-    InsertPointOrErrorTy AfterIP =
+-        RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced);
+-    if (!AfterIP)
+-      return AfterIP.takeError();
+-    Builder.restoreIP(*AfterIP);
 -    if (!Builder.GetInsertBlock())
 -      return InsertPointTy();
 -    // store is inside of the reduction region when using by-ref
@@ -11696,22 +11118,61 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
 -      Builder.CreateStore(Reduced, LHSPtr);
 -  }
 -  Builder.CreateRetVoid();
--
-+  populateReductionFunction(ReductionFunc, ReductionInfos, Builder, IsByRef,
-+                            false);
++  if (!Builder.GetInsertBlock())
++    return InsertPointTy();
+ 
    Builder.SetInsertPoint(ContinuationBlock);
    return Builder.saveIP();
- }
-@@ -2434,7 +3574,7 @@
-   CL->assertOK();
- #endif
+@@ -3984,11 +4051,9 @@
    return CL;
--}
-+  }
+ }
+ 
+-Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
+-    const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
+-    Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
+-    InsertPointTy ComputeIP, const Twine &Name) {
+-
++Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
++    const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step,
++    bool IsSigned, bool InclusiveStop, const Twine &Name) {
+   // Consider the following difficulties (assuming 8-bit signed integers):
+   //  * Adding \p Step to the loop counter which passes \p Stop may overflow:
+   //      DO I = 1, 100, 50
+@@ -4000,9 +4065,7 @@
+   assert(IndVarTy == Stop->getType() && "Stop type mismatch");
+   assert(IndVarTy == Step->getType() && "Step type mismatch");
+ 
+-  LocationDescription ComputeLoc =
+-      ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
+-  updateToLocation(ComputeLoc);
++  updateToLocation(Loc);
+ 
+   ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
+   ConstantInt *One = ConstantInt::get(IndVarTy, 1);
+@@ -4042,8 +4105,20 @@
+     Value *OneCmp = Builder.CreateICmp(CmpInst::ICMP_ULE, Span, Incr);
+     CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
+   }
+-  Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
+-                                          "omp_" + Name + ".tripcount");
++
++  return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
++                              "omp_" + Name + ".tripcount");
++}
++
++Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
++    const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
++    Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
++    InsertPointTy ComputeIP, const Twine &Name) {
++  LocationDescription ComputeLoc =
++      ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
++
++  Value *TripCount = calculateCanonicalLoopTripCount(
++      ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
  
- CanonicalLoopInfo *
- OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
-@@ -2608,7 +3748,8 @@
+   auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
+     Builder.restoreIP(CodeGenIP);
+@@ -4127,7 +4202,8 @@
                        PUpperBound, PStride, One, Zero});
    Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
    Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
@@ -11721,7 +11182,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
    Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
    CLI->setTripCount(TripCount);
  
-@@ -2753,6 +3894,7 @@
+@@ -4290,6 +4366,7 @@
        IsLastChunk, CountUntilOrigTripCount, ChunkRange, "omp_chunk.tripcount");
    Value *BackcastedChunkTC =
        Builder.CreateTrunc(ChunkTripCount, IVTy, "omp_chunk.tripcount.trunc");
@@ -11729,7 +11190,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
    CLI->setTripCount(BackcastedChunkTC);
  
    // Update all uses of the induction variable except the one in the condition
-@@ -2828,10 +3970,24 @@
+@@ -4369,10 +4446,24 @@
  static void createTargetLoopWorkshareCall(
      OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType,
      BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg,
@@ -11756,7 +11217,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
    FunctionCallee RTLFn =
        getKmpcForStaticLoopForType(TripCountTy, OMPBuilder, LoopType);
    SmallVector<Value *, 8> RealArgs;
-@@ -2841,6 +3997,7 @@
+@@ -4382,6 +4473,7 @@
    RealArgs.push_back(TripCount);
    if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
      RealArgs.push_back(ConstantInt::get(TripCountTy, 0));
@@ -11764,7 +11225,45 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
      Builder.CreateCall(RTLFn, RealArgs);
      return;
    }
-@@ -4574,10 +5731,9 @@
+@@ -5307,8 +5399,8 @@
+     for (auto &AlignedItem : AlignedVars) {
+       Value *AlignedPtr = AlignedItem.first;
+       Value *Alignment = AlignedItem.second;
+-      Builder.CreateAlignmentAssumption(F->getDataLayout(),
+-                                        AlignedPtr, Alignment);
++      Builder.CreateAlignmentAssumption(F->getDataLayout(), AlignedPtr,
++                                        Alignment);
+     }
+     Builder.restoreIP(IP);
+   }
+@@ -5456,16 +5548,16 @@
+   Loop *L = LI.getLoopFor(CLI->getHeader());
+   assert(L && "Expecting CanonicalLoopInfo to be recognized as a loop");
+ 
+-  TargetTransformInfo::UnrollingPreferences UP =
+-      gatherUnrollingPreferences(L, SE, TTI,
+-                                 /*BlockFrequencyInfo=*/nullptr,
+-                                 /*ProfileSummaryInfo=*/nullptr, ORE, static_cast<int>(OptLevel),
+-                                 /*UserThreshold=*/std::nullopt,
+-                                 /*UserCount=*/std::nullopt,
+-                                 /*UserAllowPartial=*/true,
+-                                 /*UserAllowRuntime=*/true,
+-                                 /*UserUpperBound=*/std::nullopt,
+-                                 /*UserFullUnrollMaxCount=*/std::nullopt);
++  TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
++      L, SE, TTI,
++      /*BlockFrequencyInfo=*/nullptr,
++      /*ProfileSummaryInfo=*/nullptr, ORE, static_cast<int>(OptLevel),
++      /*UserThreshold=*/std::nullopt,
++      /*UserCount=*/std::nullopt,
++      /*UserAllowPartial=*/true,
++      /*UserAllowRuntime=*/true,
++      /*UserUpperBound=*/std::nullopt,
++      /*UserFullUnrollMaxCount=*/std::nullopt);
+ 
+   UP.Force = true;
+ 
+@@ -6128,10 +6220,11 @@
    return Builder.CreateCall(Fn, Args);
  }
  
@@ -11775,12 +11274,14 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
 +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
 +    const LocationDescription &Loc, bool IsSPMD,
 +    const llvm::OpenMPIRBuilder::TargetKernelDefaultBounds &Bounds) {
++  assert(!Bounds.MaxThreads.empty() && !Bounds.MaxTeams.empty() &&
++         "expected num_threads and num_teams to be specified");
    if (!updateToLocation(Loc))
      return Loc.IP;
  
-@@ -4592,28 +5748,32 @@
- 
-   Function *Kernel = Builder.GetInsertBlock()->getParent();
+@@ -6156,28 +6249,32 @@
+     assert(Kernel && "Expected the real kernel to exist");
+   }
  
 +  // Set the grid value in the config needed for lowering later on
 +  Config.setGridValue(getGridValue(T, Kernel));
@@ -11789,7 +11290,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
    // environment.
 -  if (MinTeamsVal > 1 || MaxTeamsVal > 0)
 -    writeTeamsForKernel(T, *Kernel, MinTeamsVal, MaxTeamsVal);
--
++  if (Bounds.MinTeams > 1 || Bounds.MaxTeams.front() > 0)
++    writeTeamsForKernel(T, *Kernel, Bounds.MinTeams, Bounds.MaxTeams.front());
+ 
 -#if FIX_NUM_THREADS_ISSUE
 -  //breaks 534.hpgmg 
 -  // For max values, < 0 means unset, == 0 means set but unknown.
@@ -11800,9 +11303,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
 -
 -  if (MaxThreadsVal > 0)
 -    writeThreadBoundsForKernel(T, *Kernel, MinThreadsVal, MaxThreadsVal);
-+  if (Bounds.MinTeams > 1 || Bounds.MaxTeams > 0)
-+    writeTeamsForKernel(T, *Kernel, Bounds.MinTeams, Bounds.MaxTeams);
- 
+-
 -  Constant *MinThreads = ConstantInt::getSigned(Int32, MinThreadsVal);
 -  Constant *MaxThreads = ConstantInt::getSigned(Int32, MaxThreadsVal);
 -  Constant *MinTeams = ConstantInt::getSigned(Int32, MinTeamsVal);
@@ -11811,7 +11312,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
 -  Constant *ReductionBufferLength = ConstantInt::getSigned(Int32, 0);
 +  // If MaxThreads not set, select the maximum between the default workgroup
 +  // size and the MinThreads value.
-+  int32_t MaxThreadsValue = Bounds.MaxThreads;
++  int32_t MaxThreadsValue = Bounds.MaxThreads.front();
 +  if (MaxThreadsValue < 0)
 +    MaxThreadsValue = std::max(
 +        int32_t(getGridValue(T, Kernel).GV_Default_WG_Size), Bounds.MinThreads);
@@ -11822,27 +11323,25 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
 +  Constant *MinThreads = ConstantInt::getSigned(Int32, Bounds.MinThreads);
 +  Constant *MaxThreads = ConstantInt::getSigned(Int32, MaxThreadsValue);
 +  Constant *MinTeams = ConstantInt::getSigned(Int32, Bounds.MinTeams);
-+  Constant *MaxTeams = ConstantInt::getSigned(Int32, Bounds.MaxTeams);
++  Constant *MaxTeams = ConstantInt::getSigned(Int32, Bounds.MaxTeams.front());
 +  Constant *ReductionDataSize =
 +      ConstantInt::getSigned(Int32, Bounds.ReductionDataSize);
 +  Constant *ReductionBufferLength =
 +      ConstantInt::getSigned(Int32, Bounds.ReductionBufferLength);
  
-   // We need to strip the debug prefix to get the correct kernel name.
-   StringRef KernelName = Kernel->getName();
-@@ -4672,9 +5832,8 @@
-           ? KernelEnvironmentGV
-           : ConstantExpr::getAddrSpaceCast(KernelEnvironmentGV,
-                                            KernelEnvironmentPtr);
--  Value *KernelLaunchEnvironment = Kernel->getArg(0);
-   CallInst *ThreadKind =
--      Builder.CreateCall(Fn, {KernelEnvironment, KernelLaunchEnvironment});
-+      Builder.CreateCall(Fn, {KernelEnvironment, Kernel->getArg(0)});
- 
-   Value *ExecUserCode = Builder.CreateICmpEQ(
-       ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
-@@ -5116,6 +6275,45 @@
-   return getOrCreateRuntimeFunction(M, Name);
+   Function *Fn = getOrCreateRuntimeFunctionPtr(
+       omp::RuntimeFunction::OMPRTL___kmpc_target_init);
+@@ -6497,7 +6594,7 @@
+   if (Config.IsTargetDevice.value_or(false)) {
+     if (BodyGenCB) {
+       InsertPointOrErrorTy AfterIP =
+-          BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
++          BodyGenCB(CodeGenIP, BodyGenTy::NoPriv);
+       if (!AfterIP)
+         return AfterIP.takeError();
+       Builder.restoreIP(*AfterIP);
+@@ -6762,9 +6859,49 @@
+   return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
  }
  
 +static void emitUsed(StringRef Name, std::vector<llvm::WeakTrackingVH> &List,
@@ -11884,13 +11383,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
 +  LLVMCompilerUsed.emplace_back(GVMode);
 +}
 +
- static void replaceConstatExprUsesInFuncWithInstr(ConstantExpr *ConstExpr,
-                                                   Function *Func) {
-   for (User *User : make_early_inc_range(ConstExpr->users())) {
-@@ -5138,8 +6336,9 @@
- }
- 
- static Function *createOutlinedFunction(
+ static Expected<Function *> createOutlinedFunction(
 -    OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, StringRef FuncName,
 -    SmallVectorImpl<Value *> &Inputs,
 +    OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsSPMD,
@@ -11899,9 +11392,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
      OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
      OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
    SmallVector<Type *> ParameterTypes;
-@@ -5165,6 +6364,24 @@
-   auto Func = Function::Create(FuncType, GlobalValue::InternalLinkage, FuncName,
-                                Builder.GetInsertBlock()->getModule());
+@@ -6792,6 +6929,24 @@
+   auto Func =
+       Function::Create(FuncType, GlobalValue::InternalLinkage, FuncName, M);
  
 +  // Forward target-cpu and target-features function attributes from the
 +  // original function to the new outlined function.
@@ -11917,14 +11410,14 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
 +
 +  if (OMPBuilder.Config.isTargetDevice()) {
 +    std::vector<llvm::WeakTrackingVH> LLVMCompilerUsed;
-+    emitExecutionMode(OMPBuilder, Builder, FuncName, false, LLVMCompilerUsed);
++    emitExecutionMode(OMPBuilder, Builder, FuncName, IsSPMD, LLVMCompilerUsed);
 +    Type *Int8PtrTy = Type::getInt8Ty(Builder.getContext())->getPointerTo();
 +    emitUsed("llvm.compiler.used", LLVMCompilerUsed, Int8PtrTy, OMPBuilder.M);
 +  }
    // Save insert point.
-   auto OldInsertPoint = Builder.saveIP();
- 
-@@ -5174,7 +6391,8 @@
+   IRBuilder<>::InsertPointGuard IPG(Builder);
+   // If there's a DISubprogram associated with current function, then
+@@ -6831,7 +6986,8 @@
  
    // Insert target init call in the device compilation pass.
    if (OMPBuilder.Config.isTargetDevice())
@@ -11934,21 +11427,20 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
  
    BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
  
-@@ -5241,16 +6459,18 @@
+@@ -7026,15 +7182,17 @@
  }
  
- static void emitTargetOutlinedFunction(
--    OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
+ static Error emitTargetOutlinedFunction(
+-    OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry,
 -    TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn,
 -    Constant *&OutlinedFnID, SmallVectorImpl<Value *> &Inputs,
 +    OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsSPMD,
-+    TargetRegionEntryInfo &EntryInfo,
++    bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo,
 +    const OpenMPIRBuilder::TargetKernelDefaultBounds &DefaultBounds,
 +    Function *&OutlinedFn, Constant *&OutlinedFnID,
 +    SmallVectorImpl<Value *> &Inputs,
      OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
      OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
- 
    OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
 -      [&OMPBuilder, &Builder, &Inputs, &CBFunc,
 -       &ArgAccessorFuncCB](StringRef EntryFnName) {
@@ -11959,129 +11451,256 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
                                        CBFunc, ArgAccessorFuncCB);
        };
  
-@@ -5258,12 +6478,14 @@
-                                       OutlinedFn, OutlinedFnID);
- }
+@@ -7331,9 +7489,11 @@
  
--static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
--                           OpenMPIRBuilder::InsertPointTy AllocaIP,
--                           Function *OutlinedFn, Constant *OutlinedFnID,
--                           int32_t NumTeams, int32_t NumThreads,
--                           SmallVectorImpl<Value *> &Args,
--                           OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB) {
-+static void
-+emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
+ static void
+ emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
+-               OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn,
+-               Constant *OutlinedFnID, ArrayRef<int32_t> NumTeams,
+-               ArrayRef<int32_t> NumThreads, SmallVectorImpl<Value *> &Args,
 +               OpenMPIRBuilder::InsertPointTy AllocaIP,
 +               const OpenMPIRBuilder::TargetKernelDefaultBounds &DefaultBounds,
 +               const OpenMPIRBuilder::TargetKernelRuntimeBounds &RuntimeBounds,
 +               Function *OutlinedFn, Constant *OutlinedFnID,
-+               SmallVectorImpl<Value *> &Args,
-+               OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB) {
- 
-   OpenMPIRBuilder::TargetDataInfo Info(
-       /*RequiresDevicePointerInfo=*/false,
-@@ -5288,22 +6510,56 @@
-   unsigned NumTargetItems = MapInfo.BasePointers.size();
-   // TODO: Use correct device ID
-   Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF);
--  Value *NumTeamsVal = Builder.getInt32(NumTeams);
--  Value *NumThreadsVal = Builder.getInt32(NumThreads);
-   uint32_t SrcLocStrSize;
-   Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
-   Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
-                                              llvm::omp::IdentFlag(0), 0);
--  // TODO: Use correct NumIterations
--  Value *NumIterations = Builder.getInt64(0);
-+
-+  Value *TripCount = RuntimeBounds.LoopTripCount
-+                         ? Builder.CreateIntCast(RuntimeBounds.LoopTripCount,
-+                                                 Builder.getInt64Ty(),
-+                                                 /*isSigned=*/false)
-+                         : Builder.getInt64(0);
-+
-+  Value *NumTeams = RuntimeBounds.MaxTeams
-+                        ? RuntimeBounds.MaxTeams
-+                        : Builder.getInt32(DefaultBounds.MaxTeams);
-+
-+  // Calculate number of threads: 0 if no clauses specified, otherwise it is the
-+  // minimum between optional THREAD_LIMIT and MAX_THREADS clauses. Perform a
-+  // type cast to uint32.
-+  auto InitMaxThreadsClause = [&Builder](Value *Clause) {
-+    if (Clause)
-+      Clause = Builder.CreateIntCast(Clause, Builder.getInt32Ty(),
-+                                     /*isSigned=*/false);
-+    return Clause;
++               SmallVectorImpl<Value *> &Args, Value *IfCond,
+                OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
+                SmallVector<llvm::OpenMPIRBuilder::DependData> Dependencies = {},
+                bool HasNoWait = false) {
+@@ -7379,9 +7539,7 @@
+     return Error::success();
+   };
+ 
+-  // If we don't have an ID for the target region, it means an offload entry
+-  // wasn't created. In this case we just run the host fallback directly.
+-  if (!OutlinedFnID) {
++  auto &&EmitTargetCallElse = [&]() {
+     OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = [&]() {
+       if (RequiresOuterTargetTask) {
+         // Arguments that are intended to be directly forwarded to an
+@@ -7398,66 +7556,142 @@
+     // produce any. The 'if' check enables accessing the returned value.
+     if (AfterIP)
+       Builder.restoreIP(*AfterIP);
 +  };
 +
-+  auto CombineMaxThreadsClauses = [&Builder](Value *Clause, Value *&Result) {
-+    if (Clause)
-+      Result = Result
-+                   ? Builder.CreateSelect(Builder.CreateICmpULT(Result, Clause),
++  auto &&EmitTargetCallThen = [&]() {
++    OpenMPIRBuilder::TargetDataInfo Info(
++        /*RequiresDevicePointerInfo=*/false,
++        /*SeparateBeginEndCalls=*/true);
++
++    OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
++    OpenMPIRBuilder::TargetDataRTArgs RTArgs;
++    OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info,
++                                           RTArgs, MapInfo,
++                                           /*IsNonContiguous=*/true,
++                                           /*ForEndCall=*/false);
++
++    SmallVector<Value *, 3> NumTeamsC;
++    for (auto [DefNumTeams, RtNumTeams] :
++         llvm::zip_equal(DefaultBounds.MaxTeams, RuntimeBounds.MaxTeams)) {
++      NumTeamsC.push_back(RtNumTeams ? RtNumTeams
++                                     : Builder.getInt32(DefNumTeams));
++    }
++
++    // Calculate number of threads: 0 if no clauses specified, otherwise it is
++    // the minimum between optional THREAD_LIMIT and MAX_THREADS clauses.
++    // Perform a type cast to uint32.
++    auto InitMaxThreadsClause = [&Builder](Value *Clause) {
++      if (Clause)
++        Clause = Builder.CreateIntCast(Clause, Builder.getInt32Ty(),
++                                       /*isSigned=*/false);
++      return Clause;
++    };
++
++    auto CombineMaxThreadsClauses = [&Builder](Value *Clause, Value *&Result) {
++      if (Clause)
++        Result =
++            Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result, Clause),
 +                                          Result, Clause)
 +                   : Clause;
-+  };
++    };
++
++    // TODO: Check if this is the correct handling for multi-dim thread_limit.
++    SmallVector<Value *, 3> NumThreadsC;
++    Value *MaxThreadsClause = InitMaxThreadsClause(RuntimeBounds.MaxThreads);
++
++    for (auto [RtTeamsThreadLimit, RtTargetThreadLimit] : llvm::zip_equal(
++             RuntimeBounds.TeamsThreadLimit, RuntimeBounds.TargetThreadLimit)) {
++      Value *TeamsThreadLimitClause = InitMaxThreadsClause(RtTeamsThreadLimit);
++      Value *NumThreads = InitMaxThreadsClause(RtTargetThreadLimit);
++
++      CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
++      CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
 +
-+  Value *MaxThreadsClause = InitMaxThreadsClause(RuntimeBounds.MaxThreads);
-+  Value *TeamsThreadLimitClause =
-+      InitMaxThreadsClause(RuntimeBounds.TeamsThreadLimit);
-+  Value *NumThreads = InitMaxThreadsClause(RuntimeBounds.TargetThreadLimit);
-+  CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
-+  CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
++      NumThreadsC.push_back(NumThreads ? NumThreads : Builder.getInt32(0));
++    }
++
++    unsigned NumTargetItems = Info.NumberOfPtrs;
++    // TODO: Use correct device ID
++    Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF);
++    uint32_t SrcLocStrSize;
++    Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
++    Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
++                                               llvm::omp::IdentFlag(0), 0);
++
++    Value *TripCount = RuntimeBounds.LoopTripCount
++                           ? Builder.CreateIntCast(RuntimeBounds.LoopTripCount,
++                                                   Builder.getInt64Ty(),
++                                                   /*isSigned=*/false)
++                           : Builder.getInt64(0);
++
++    // TODO: Use correct DynCGGroupMem
++    Value *DynCGGroupMem = Builder.getInt32(0);
++
++    KArgs = OpenMPIRBuilder::TargetKernelArgs(NumTargetItems, RTArgs, TripCount,
++                                              NumTeamsC, NumThreadsC,
++                                              DynCGGroupMem, HasNoWait);
++
++    // The presence of certain clauses on the target directive require the
++    // explicit generation of the target task.
++    OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = [&]() {
++      if (RequiresOuterTargetTask)
++        return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
++                                         Dependencies, HasNoWait);
++
++      return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
++                                         EmitTargetCallFallbackCB, KArgs,
++                                         DeviceID, RTLoc, AllocaIP);
++    }();
 +
-+  if (!NumThreads)
-+    NumThreads = Builder.getInt32(0);
++    // Assume no error was returned because TaskBodyCB and
++    // EmitTargetCallFallbackCB don't produce any. The 'if' check enables
++    // accessing the returned value.
++    if (AfterIP)
++      Builder.restoreIP(*AfterIP);
++  };
 +
-   // TODO: Use correct DynCGGroupMem
-   Value *DynCGGroupMem = Builder.getInt32(0);
++  // If we don't have an ID for the target region, it means an offload entry
++  // wasn't created. In this case we just run the host fallback directly.
++  if (!OutlinedFnID) {
++    EmitTargetCallElse();
+     return;
+   }
+ 
+-  OpenMPIRBuilder::TargetDataInfo Info(
+-      /*RequiresDevicePointerInfo=*/false,
+-      /*SeparateBeginEndCalls=*/true);
+-
+-  OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
+-  OpenMPIRBuilder::TargetDataRTArgs RTArgs;
+-  OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info,
+-                                         RTArgs, MapInfo,
+-                                         /*IsNonContiguous=*/true,
+-                                         /*ForEndCall=*/false);
 -
-   bool HasNoWait = false;
- 
--  OpenMPIRBuilder::TargetKernelArgs KArgs(NumTargetItems, RTArgs, NumIterations,
--                                          NumTeamsVal, NumThreadsVal,
--                                          DynCGGroupMem, HasNoWait);
-+  OpenMPIRBuilder::TargetKernelArgs KArgs(NumTargetItems, RTArgs, TripCount,
-+                                          NumTeams, NumThreads, DynCGGroupMem,
-+                                          HasNoWait);
- 
-   Builder.restoreIP(OMPBuilder.emitKernelLaunch(
-       Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
-@@ -5311,10 +6567,11 @@
+-  SmallVector<Value *, 3> NumTeamsC;
+-  SmallVector<Value *, 3> NumThreadsC;
+-  for (auto V : NumTeams)
+-    NumTeamsC.push_back(llvm::ConstantInt::get(Builder.getInt32Ty(), V));
+-  for (auto V : NumThreads)
+-    NumThreadsC.push_back(llvm::ConstantInt::get(Builder.getInt32Ty(), V));
+-
+-  unsigned NumTargetItems = Info.NumberOfPtrs;
+-  // TODO: Use correct device ID
+-  Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF);
+-  uint32_t SrcLocStrSize;
+-  Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
+-  Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
+-                                             llvm::omp::IdentFlag(0), 0);
+-  // TODO: Use correct NumIterations
+-  Value *NumIterations = Builder.getInt64(0);
+-  // TODO: Use correct DynCGGroupMem
+-  Value *DynCGGroupMem = Builder.getInt32(0);
+-
+-  KArgs = OpenMPIRBuilder::TargetKernelArgs(
+-      NumTargetItems, RTArgs, NumIterations, NumTeamsC, NumThreadsC,
+-      DynCGGroupMem, HasNoWait);
+-
+-  // The presence of certain clauses on the target directive require the
+-  // explicit generation of the target task.
+-  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = [&]() {
+-    if (RequiresOuterTargetTask)
+-      return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
+-                                       Dependencies, HasNoWait);
+-
+-    return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
+-                                       EmitTargetCallFallbackCB, KArgs,
+-                                       DeviceID, RTLoc, AllocaIP);
+-  }();
++  // If there's no IF clause, only generate the kernel launch code path.
++  if (!IfCond) {
++    EmitTargetCallThen();
++    return;
++  }
+ 
+-  // Assume no error was returned because TaskBodyCB and
+-  // EmitTargetCallFallbackCB don't produce any. The 'if' check enables
+-  // accessing the returned value.
+-  if (AfterIP)
+-    Builder.restoreIP(*AfterIP);
++  // Create if-else to handle IF clause.
++  llvm::BasicBlock *ThenBlock =
++      BasicBlock::Create(Builder.getContext(), "omp_if.then");
++  llvm::BasicBlock *ElseBlock =
++      BasicBlock::Create(Builder.getContext(), "omp_if.else");
++  llvm::BasicBlock *ContBlock =
++      BasicBlock::Create(Builder.getContext(), "omp_if.end");
++  Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
++
++  Function *CurFn = Builder.GetInsertBlock()->getParent();
++
++  // Emit the 'then' code.
++  OMPBuilder.emitBlock(ThenBlock, CurFn);
++  EmitTargetCallThen();
++  OMPBuilder.emitBranch(ContBlock);
++  // Emit the 'else' code.
++  OMPBuilder.emitBlock(ElseBlock, CurFn);
++  EmitTargetCallElse();
++  OMPBuilder.emitBranch(ContBlock);
++  // Emit the continuation block.
++  OMPBuilder.emitBlock(ContBlock, CurFn, /*IsFinished=*/true);
  }
  
- OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget(
--    const LocationDescription &Loc, InsertPointTy AllocaIP,
--    InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
--    int32_t NumThreads, SmallVectorImpl<Value *> &Args,
--    GenMapInfoCallbackTy GenMapInfoCB,
-+    const LocationDescription &Loc, bool IsSPMD, InsertPointTy AllocaIP,
-+    InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo,
+ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
+-    const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocaIP,
+-    InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo,
+-    ArrayRef<int32_t> NumTeams, ArrayRef<int32_t> NumThreads,
++    const LocationDescription &Loc, bool IsSPMD, bool IsOffloadEntry,
++    Value *IfCond, InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
++    TargetRegionEntryInfo &EntryInfo,
 +    const TargetKernelDefaultBounds &DefaultBounds,
 +    const TargetKernelRuntimeBounds &RuntimeBounds,
-+    SmallVectorImpl<Value *> &Args, GenMapInfoCallbackTy GenMapInfoCB,
+     SmallVectorImpl<Value *> &Args, GenMapInfoCallbackTy GenMapInfoCB,
      OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
-     OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB) {
-   if (!updateToLocation(Loc))
-@@ -5324,11 +6581,12 @@
- 
-   Function *OutlinedFn;
-   Constant *OutlinedFnID;
--  emitTargetOutlinedFunction(*this, Builder, EntryInfo, OutlinedFn,
--                             OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB);
-+  emitTargetOutlinedFunction(*this, Builder, IsSPMD, EntryInfo, DefaultBounds,
-+                             OutlinedFn, OutlinedFnID, Args, CBFunc,
-+                             ArgAccessorFuncCB);
+     OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
+@@ -7474,16 +7708,17 @@
+   // the target region itself is generated using the callbacks CBFunc
+   // and ArgAccessorFuncCB
+   if (Error Err = emitTargetOutlinedFunction(
+-          *this, Builder, IsOffloadEntry, EntryInfo, OutlinedFn, OutlinedFnID,
+-          Args, CBFunc, ArgAccessorFuncCB))
++          *this, Builder, IsSPMD, IsOffloadEntry, EntryInfo, DefaultBounds,
++          OutlinedFn, OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB))
+     return Err;
+ 
+   // If we are not on the target device, then we need to generate code
+   // to make a remote call (offload) to the previously outlined function
+   // that represents the target region. Do that now.
    if (!Config.isTargetDevice())
 -    emitTargetCall(*this, Builder, AllocaIP, OutlinedFn, OutlinedFnID, NumTeams,
--                   NumThreads, Args, GenMapInfoCB);
+-                   NumThreads, Args, GenMapInfoCB, Dependencies, HasNowait);
 +    emitTargetCall(*this, Builder, AllocaIP, DefaultBounds, RuntimeBounds,
-+                   OutlinedFn, OutlinedFnID, Args, GenMapInfoCB);
- 
++                   OutlinedFn, OutlinedFnID, Args, IfCond, GenMapInfoCB,
++                   Dependencies, HasNowait);
    return Builder.saveIP();
  }
-@@ -6360,6 +7618,43 @@
- }
  
- OpenMPIRBuilder::InsertPointTy
+@@ -8590,6 +8825,44 @@
+ 
+   return Builder.saveIP();
+ }
++
++OpenMPIRBuilder::InsertPointOrErrorTy
 +OpenMPIRBuilder::createDistribute(const LocationDescription &Loc,
 +                                  InsertPointTy OuterAllocaIP,
 +                                  BodyGenCallbackTy BodyGenCB) {
@@ -12105,7 +11724,8 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
 +  // Generate the body of distribute clause
 +  InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin());
 +  InsertPointTy CodeGenIP(BodyBB, BodyBB->begin());
-+  BodyGenCB(AllocaIP, CodeGenIP);
++  if (Error Err = BodyGenCB(AllocaIP, CodeGenIP))
++    return Err;
 +
 +  OutlineInfo OI;
 +  OI.OuterAllocaBB = OuterAllocaIP.getBlock();
@@ -12117,123 +11737,40 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Frontend/OpenMP/OMPIRBu
 +
 +  return Builder.saveIP();
 +}
-+
-+OpenMPIRBuilder::InsertPointTy
+ 
+ OpenMPIRBuilder::InsertPointOrErrorTy
  OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
-                              BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower,
-                              Value *NumTeamsUpper, Value *ThreadLimit,
-@@ -6488,7 +7783,6 @@
-     Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
-                            omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
-                        Args);
--
-     while (!ToBeDeleted.empty()) {
-       ToBeDeleted.top()->eraseFromParent();
-       ToBeDeleted.pop();
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Transforms/IPO/OpenMPOpt.cpp llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
---- llvm-project.orig/llvm/lib/Transforms/IPO/OpenMPOpt.cpp	2024-06-12 10:43:13.628199897 -0500
-+++ llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp	2024-06-12 10:44:09.355614196 -0500
-@@ -287,6 +287,20 @@
-         OpenMPPostLink(OpenMPPostLink) {
- 
-     OMPBuilder.Config.IsTargetDevice = isOpenMPDevice(OMPBuilder.M);
-+    const Triple T(OMPBuilder.M.getTargetTriple());
-+    switch (T.getArch()) {
-+    case llvm::Triple::nvptx:
-+    case llvm::Triple::nvptx64:
-+    case llvm::Triple::amdgcn:
-+      assert(OMPBuilder.Config.IsTargetDevice &&
-+             "OpenMP AMDGPU/NVPTX is only prepared to deal with device code.");
-+      OMPBuilder.Config.IsGPU = true;
-+      break;
-+    default:
-+      OMPBuilder.Config.IsGPU = false;
-+      break;
-+    }
-+
-     OMPBuilder.initialize();
-     initializeRuntimeFunctions(M);
-     initializeInternalControlVars();
-@@ -535,6 +549,7 @@
-   void recollectUses() {
-     for (int Idx = 0; Idx < RFIs.size(); ++Idx)
-       recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
-+    OMPBuilder.Config.IsTargetDevice = isOpenMPDevice(OMPBuilder.M);
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/llvm/lib/Transforms/Utils/CodeExtractor.cpp llvm-project-aso/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+--- llvm-project-aso-orig/llvm/lib/Transforms/Utils/CodeExtractor.cpp	2024-11-23 20:25:27.183273941 -0600
++++ llvm-project-aso/llvm/lib/Transforms/Utils/CodeExtractor.cpp	2024-11-23 20:39:47.196175308 -0600
+@@ -1801,7 +1801,7 @@
+     ReloadOutputs.push_back(alloca);
    }
  
-   // Helper function to inherit the calling convention of the function callee.
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/lib/Transforms/Utils/CodeExtractor.cpp llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
---- llvm-project.orig/llvm/lib/Transforms/Utils/CodeExtractor.cpp	2024-06-12 10:43:13.660199561 -0500
-+++ llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp	2024-06-12 10:44:09.355614196 -0500
-@@ -1196,6 +1196,7 @@
- 
-   StructType *StructArgTy = nullptr;
-   AllocaInst *Struct = nullptr;
-+  Instruction *StructSpaceCast = nullptr;
-   unsigned NumAggregatedInputs = 0;
-   if (AggregateArgs && !StructValues.empty()) {
-     std::vector<Type *> ArgTypes;
-@@ -1214,20 +1215,34 @@
+-  AllocaInst *Struct = nullptr;
++  Instruction *Struct = nullptr;
+   if (!StructValues.empty()) {
+     Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr,
+                             "structArg", AllocaBlock->getFirstInsertionPt());
+@@ -1809,11 +1809,11 @@
+       auto *StructSpaceCast = new AddrSpaceCastInst(
            Struct, PointerType ::get(Context, 0), "structArg.ascast");
        StructSpaceCast->insertAfter(Struct);
-       params.push_back(StructSpaceCast);
-+      // Store aggregated inputs in the struct.
-+      for (unsigned i = 0, e = StructValues.size(); i != e; ++i) {
-+        if (inputs.contains(StructValues[i])) {
-+          Value *Idx[2];
-+          Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
-+          Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
-+          GetElementPtrInst *GEP =
-+              GetElementPtrInst::Create(StructArgTy, StructSpaceCast, Idx,
-+                                        "gep_" + StructValues[i]->getName());
-+          GEP->insertInto(codeReplacer, codeReplacer->end());
-+          new StoreInst(StructValues[i], GEP, codeReplacer);
-+          NumAggregatedInputs++;
-+        }
-+      }
-     } else {
-       params.push_back(Struct);
--    }
--    // Store aggregated inputs in the struct.
--    for (unsigned i = 0, e = StructValues.size(); i != e; ++i) {
--      if (inputs.contains(StructValues[i])) {
--        Value *Idx[2];
--        Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
--        Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
--        GetElementPtrInst *GEP = GetElementPtrInst::Create(
--            StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
--        GEP->insertInto(codeReplacer, codeReplacer->end());
--        new StoreInst(StructValues[i], GEP, codeReplacer);
--        NumAggregatedInputs++;
-+      // Store aggregated inputs in the struct.
-+      for (unsigned i = 0, e = StructValues.size(); i != e; ++i) {
-+        if (inputs.contains(StructValues[i])) {
-+          Value *Idx[2];
-+          Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
-+          Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
-+          GetElementPtrInst *GEP = GetElementPtrInst::Create(
-+              StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
-+          GEP->insertInto(codeReplacer, codeReplacer->end());
-+          new StoreInst(StructValues[i], GEP, codeReplacer);
-+          NumAggregatedInputs++;
-+        }
-       }
+-      params.push_back(StructSpaceCast);
+-    } else {
+-      params.push_back(Struct);
++      Struct = StructSpaceCast;
      }
-   }
-@@ -1262,7 +1277,8 @@
-       Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
-       Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx);
-       GetElementPtrInst *GEP = GetElementPtrInst::Create(
--          StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName());
-+          StructArgTy, StructSpaceCast ? StructSpaceCast : Struct, Idx,
-+          "gep_reload_" + outputs[i]->getName());
-       GEP->insertInto(codeReplacer, codeReplacer->end());
-       Output = GEP;
-       ++aggIdx;
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp llvm-project/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
---- llvm-project.orig/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp	2024-06-12 10:43:15.156183831 -0500
-+++ llvm-project/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp	2024-06-12 10:44:09.359614154 -0500
-@@ -600,6 +600,7 @@
+ 
++    params.push_back(Struct);
++
+     unsigned AggIdx = 0;
+     for (Value *input : inputs) {
+       if (!StructValues.contains(input))
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp llvm-project-aso/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+--- llvm-project-aso-orig/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp	2024-10-29 11:07:19.981633529 -0500
++++ llvm-project-aso/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp	2024-11-23 20:39:47.196175308 -0600
+@@ -629,6 +629,7 @@
        "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = true;
@@ -12241,7 +11778,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -707,6 +708,7 @@
+@@ -741,6 +742,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12249,7 +11786,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -812,6 +814,7 @@
+@@ -851,6 +853,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12257,7 +11794,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -906,6 +909,7 @@
+@@ -951,6 +954,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12265,7 +11802,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -1015,6 +1019,7 @@
+@@ -1068,6 +1072,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12273,7 +11810,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -1120,6 +1125,7 @@
+@@ -1176,6 +1181,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12281,7 +11818,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -1234,6 +1240,7 @@
+@@ -1298,6 +1304,7 @@
  TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12289,7 +11826,37 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -2237,6 +2244,7 @@
+@@ -1420,8 +1427,7 @@
+   EXPECT_EQ(&Loop->getAfter()->front(), RetInst);
+ }
+ 
+-TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) {
+-  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
++TEST_F(OpenMPIRBuilderTest, CanonicalLoopTripCount) {
+   OpenMPIRBuilder OMPBuilder(*M);
+   OMPBuilder.initialize();
+   IRBuilder<> Builder(BB);
+@@ -1437,17 +1443,8 @@
+     Value *StartVal = ConstantInt::get(LCTy, Start);
+     Value *StopVal = ConstantInt::get(LCTy, Stop);
+     Value *StepVal = ConstantInt::get(LCTy, Step);
+-    auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
+-      return Error::success();
+-    };
+-    Expected<CanonicalLoopInfo *> LoopResult =
+-        OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
+-                                       StepVal, IsSigned, InclusiveStop);
+-    assert(LoopResult && "unexpected error");
+-    CanonicalLoopInfo *Loop = *LoopResult;
+-    Loop->assertOK();
+-    Builder.restoreIP(Loop->getAfterIP());
+-    Value *TripCount = Loop->getTripCount();
++    Value *TripCount = OMPBuilder.calculateCanonicalLoopTripCount(
++        Loc, StartVal, StopVal, StepVal, IsSigned, InclusiveStop);
+     return cast<ConstantInt>(TripCount)->getValue().getZExtValue();
+   };
+ 
+@@ -2332,6 +2329,7 @@
        "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = true;
@@ -12297,7 +11864,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    IRBuilder<> Builder(BB);
    OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
-@@ -2291,14 +2299,21 @@
+@@ -2389,14 +2387,21 @@
    // Check that no variables except for loop counter are used in loop body
    EXPECT_EQ(Constant::getNullValue(Builder.getPtrTy()),
              WorkshareLoopRuntimeCall->getArgOperand(2));
@@ -12321,7 +11888,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    IRBuilder<> Builder(BB);
    OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
-@@ -2400,6 +2415,7 @@
+@@ -2503,6 +2508,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12329,7 +11896,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
  
    BasicBlock *Body;
    CallInst *Call;
-@@ -2475,6 +2491,7 @@
+@@ -2579,6 +2585,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12337,7 +11904,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    IRBuilder<> Builder(BB);
    OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
-@@ -2633,6 +2650,7 @@
+@@ -2742,6 +2749,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12345,7 +11912,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    IRBuilder<> Builder(BB);
    OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
-@@ -4336,6 +4354,7 @@
+@@ -4481,6 +4489,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12353,7 +11920,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -4412,6 +4431,7 @@
+@@ -4560,6 +4569,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12361,7 +11928,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> &Builder = OMPBuilder.Builder;
-@@ -4463,6 +4483,7 @@
+@@ -4613,6 +4623,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12369,7 +11936,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> &Builder = OMPBuilder.Builder;
-@@ -4515,6 +4536,7 @@
+@@ -4669,6 +4680,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12377,7 +11944,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> &Builder = OMPBuilder.Builder;
-@@ -4570,6 +4592,7 @@
+@@ -4727,6 +4739,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12385,7 +11952,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> &Builder = OMPBuilder.Builder;
-@@ -4630,6 +4653,7 @@
+@@ -4790,6 +4803,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12393,7 +11960,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> &Builder = OMPBuilder.Builder;
-@@ -4689,6 +4713,7 @@
+@@ -4852,6 +4866,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12401,7 +11968,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> &Builder = OMPBuilder.Builder;
-@@ -4887,6 +4912,7 @@
+@@ -5053,6 +5068,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12409,7 +11976,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -5122,6 +5148,7 @@
+@@ -5298,6 +5314,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12417,18 +11984,27 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -5972,7 +5999,9 @@
+@@ -6182,9 +6199,17 @@
+ 
    TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17);
    OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL});
-   Builder.restoreIP(OMPBuilder.createTarget(
--      OmpLoc, Builder.saveIP(), Builder.saveIP(), EntryInfo, -1, 0, Inputs,
-+      OmpLoc, /*IsSPMD=*/false, Builder.saveIP(), Builder.saveIP(), EntryInfo,
-+      /*DefaultBounds=*/OpenMPIRBuilder::TargetKernelDefaultBounds(),
-+      /*RuntimeBounds=*/OpenMPIRBuilder::TargetKernelRuntimeBounds(), Inputs,
-       GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
++  OpenMPIRBuilder::TargetKernelDefaultBounds DefaultBounds;
++  DefaultBounds.MaxTeams.push_back(-1);
++  DefaultBounds.MaxThreads.push_back(-1);
++  OpenMPIRBuilder::TargetKernelRuntimeBounds RuntimeBounds;
++  RuntimeBounds.TargetThreadLimit.push_back(nullptr);
++  RuntimeBounds.TeamsThreadLimit.push_back(nullptr);
++  RuntimeBounds.MaxTeams.push_back(nullptr);
+   OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget(
+-      OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), Builder.saveIP(),
+-      EntryInfo, -1, 0, Inputs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB);
++      OmpLoc, /*IsSPMD=*/false, /*IsOffloadEntry=*/true, /*IfCond=*/nullptr,
++      Builder.saveIP(), Builder.saveIP(), EntryInfo, DefaultBounds,
++      RuntimeBounds, Inputs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB);
+   assert(AfterIP && "unexpected error");
+   Builder.restoreIP(*AfterIP);
    OMPBuilder.finalize();
-   Builder.CreateRetVoid();
-@@ -6012,6 +6041,7 @@
+@@ -6229,6 +6254,7 @@
  }
  
  TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
@@ -12436,23 +12012,30 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.setConfig(
        OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
-@@ -6075,10 +6105,11 @@
+@@ -6292,11 +6318,17 @@
    TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
                                    /*Line=*/3, /*Count=*/0);
  
--  Builder.restoreIP(
--      OMPBuilder.createTarget(Loc, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1,
+-  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+-      OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
+-                              EntryInfo, /*NumTeams=*/-1,
 -                              /*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
--                              BodyGenCB, SimpleArgAccessorCB));
-+  Builder.restoreIP(OMPBuilder.createTarget(
-+      Loc, /*IsSPMD=*/false, EntryIP, EntryIP, EntryInfo,
-+      /*DefaultBounds=*/OpenMPIRBuilder::TargetKernelDefaultBounds(),
-+      /*RuntimeBounds=*/OpenMPIRBuilder::TargetKernelRuntimeBounds(),
-+      CapturedArgs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
- 
-   Builder.CreateRetVoid();
-   OMPBuilder.finalize();
-@@ -6158,6 +6189,7 @@
+-                              BodyGenCB, SimpleArgAccessorCB);
++  OpenMPIRBuilder::TargetKernelDefaultBounds DefaultBounds;
++  DefaultBounds.MaxTeams.push_back(-1);
++  DefaultBounds.MaxThreads.push_back(-1);
++  OpenMPIRBuilder::TargetKernelRuntimeBounds RuntimeBounds;
++  RuntimeBounds.TargetThreadLimit.push_back(nullptr);
++  RuntimeBounds.TeamsThreadLimit.push_back(nullptr);
++  RuntimeBounds.MaxTeams.push_back(nullptr);
++  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget(
++      Loc, /*IsSPMD=*/false, /*IsOffloadEntry=*/true, /*IfCond=*/nullptr,
++      EntryIP, EntryIP, EntryInfo, DefaultBounds, RuntimeBounds, CapturedArgs,
++      GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB);
+   assert(AfterIP && "unexpected error");
+   Builder.restoreIP(*AfterIP);
+ 
+@@ -6378,6 +6410,7 @@
  }
  
  TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
@@ -12460,23 +12043,30 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.setConfig(
        OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
-@@ -6223,10 +6255,11 @@
+@@ -6443,11 +6476,17 @@
    TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
                                    /*Line=*/3, /*Count=*/0);
  
--  Builder.restoreIP(
--      OMPBuilder.createTarget(Loc, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1,
+-  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+-      OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
+-                              EntryInfo, /*NumTeams=*/-1,
 -                              /*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
--                              BodyGenCB, SimpleArgAccessorCB));
-+  Builder.restoreIP(OMPBuilder.createTarget(
-+      Loc, /*IsSPMD=*/false, EntryIP, EntryIP, EntryInfo,
-+      /*DefaultBounds=*/OpenMPIRBuilder::TargetKernelDefaultBounds(),
-+      /*RuntimeBounds=*/OpenMPIRBuilder::TargetKernelRuntimeBounds(),
-+      CapturedArgs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
- 
-   Builder.CreateRetVoid();
-   OMPBuilder.finalize();
-@@ -6311,6 +6344,7 @@
+-                              BodyGenCB, SimpleArgAccessorCB);
++  OpenMPIRBuilder::TargetKernelDefaultBounds DefaultBounds;
++  DefaultBounds.MaxTeams.push_back(-1);
++  DefaultBounds.MaxThreads.push_back(-1);
++  OpenMPIRBuilder::TargetKernelRuntimeBounds RuntimeBounds;
++  RuntimeBounds.TargetThreadLimit.push_back(nullptr);
++  RuntimeBounds.TeamsThreadLimit.push_back(nullptr);
++  RuntimeBounds.MaxTeams.push_back(nullptr);
++  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget(
++      Loc, /*IsSPMD=*/false, /*IsOffloadEntry=*/true, /*IfCond=*/nullptr,
++      EntryIP, EntryIP, EntryInfo, DefaultBounds, RuntimeBounds, CapturedArgs,
++      GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB);
+   assert(AfterIP && "unexpected error");
+   Builder.restoreIP(*AfterIP);
+ 
+@@ -6534,6 +6573,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12484,7 +12074,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -6440,6 +6474,7 @@
+@@ -6665,6 +6705,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12492,7 +12082,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -6471,6 +6506,7 @@
+@@ -6699,6 +6740,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12500,7 +12090,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -6501,6 +6537,7 @@
+@@ -6733,6 +6775,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12508,7 +12098,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -6575,6 +6612,7 @@
+@@ -6811,6 +6854,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12516,7 +12106,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -6629,6 +6667,7 @@
+@@ -6870,6 +6914,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12524,7 +12114,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-@@ -6790,6 +6829,7 @@
+@@ -7037,6 +7082,7 @@
    using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
    OpenMPIRBuilder OMPBuilder(*M);
    OMPBuilder.Config.IsTargetDevice = false;
@@ -12532,192 +12122,292 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/llvm/unittests/Frontend/OpenMPIR
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
---- llvm-project.orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td	2024-06-12 10:43:15.228183074 -0500
-+++ llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td	2024-06-12 10:44:09.359614154 -0500
-@@ -102,8 +102,14 @@
- 
-         Operation &firstOp = *r.op_begin();
-         Operation &secondOp = *(std::next(r.op_begin()));
--        return ::llvm::isa<LoopNestOp, LoopWrapperInterface>(firstOp) &&
--               secondOp.hasTrait<OpTrait::IsTerminator>();
-+
-+        if (!secondOp.hasTrait<OpTrait::IsTerminator>())
-+          return false;
-+
-+        if (auto wrapper = ::llvm::dyn_cast<LoopWrapperInterface>(firstOp))
-+          return wrapper.isWrapper();
-+
-+        return ::llvm::isa<LoopNestOp>(firstOp);
-       }]
-     >,
-     InterfaceMethod<
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
---- llvm-project.orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td	2024-06-12 10:43:15.228183074 -0500
-+++ llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td	2024-06-12 10:44:09.359614154 -0500
-@@ -467,7 +467,7 @@
- def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
-                          DeclareOpInterfaceMethods<LoopWrapperInterface>,
-                          RecursiveMemoryEffects, ReductionClauseInterface,
--                         SingleBlockImplicitTerminator<"TerminatorOp">]> {
-+                         SingleBlock]> {
-   let summary = "worksharing-loop construct";
-   let description = [{
-     The worksharing-loop construct specifies that the iterations of the loop(s)
-@@ -577,8 +577,7 @@
- 
- def SimdOp : OpenMP_Op<"simd", [AttrSizedOperandSegments,
-                        DeclareOpInterfaceMethods<LoopWrapperInterface>,
--                       RecursiveMemoryEffects,
--                       SingleBlockImplicitTerminator<"TerminatorOp">]> {
-+                       RecursiveMemoryEffects, SingleBlock]> {
-  let summary = "simd construct";
-   let description = [{
-     The simd construct can be applied to a loop to indicate that the loop can be
-@@ -682,8 +681,7 @@
- //===----------------------------------------------------------------------===//
- def DistributeOp : OpenMP_Op<"distribute", [AttrSizedOperandSegments,
-                              DeclareOpInterfaceMethods<LoopWrapperInterface>,
--                             RecursiveMemoryEffects,
--                             SingleBlockImplicitTerminator<"TerminatorOp">]> {
-+                             RecursiveMemoryEffects, SingleBlock]> {
-   let summary = "distribute construct";
-   let description = [{
-     The distribute construct specifies that the iterations of one or more loops
-@@ -856,7 +854,7 @@
-                            AutomaticAllocationScope,
-                            DeclareOpInterfaceMethods<LoopWrapperInterface>,
-                            RecursiveMemoryEffects, ReductionClauseInterface,
--                           SingleBlockImplicitTerminator<"TerminatorOp">]> {
-+                           SingleBlock]> {
-   let summary = "taskloop construct";
-   let description = [{
-     The taskloop construct specifies that the iterations of one or more
-@@ -1567,13 +1565,16 @@
- 
-     The optional $thread_limit specifies the limit on the number of threads
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/docs/Dialects/OpenMPDialect/_index.md llvm-project-aso/mlir/docs/Dialects/OpenMPDialect/_index.md
+--- llvm-project-aso-orig/mlir/docs/Dialects/OpenMPDialect/_index.md	2024-10-18 17:40:33.824978757 -0500
++++ llvm-project-aso/mlir/docs/Dialects/OpenMPDialect/_index.md	2024-11-23 20:39:47.200175294 -0600
+@@ -297,7 +297,8 @@
+ introduction of private copies of the same underlying variable defined outside
+ the MLIR operation the clause is attached to. Currently, clauses with this
+ property can be classified into three main categories:
+-  - Map-like clauses: `map`, `use_device_addr` and `use_device_ptr`.
++  - Map-like clauses: `host_eval`, `map`, `use_device_addr` and
++`use_device_ptr`.
+   - Reduction-like clauses: `in_reduction`, `reduction` and `task_reduction`.
+   - Privatization clauses: `private`.
+ 
+@@ -522,3 +523,58 @@
+   omp.terminator
+ } {omp.composite}
+ ```
++
++## Host-Evaluated Clauses in Target Regions
++
++The `omp.target` operation, which represents the OpenMP `target` construct, is
++marked with the `IsolatedFromAbove` trait. This means that, inside of its
++region, no MLIR values defined outside of the op itself can be used. This is
++consistent with the OpenMP specification of the `target` construct, which
++mandates that all host device values used inside of the `target` region must
++either be privatized (data-sharing) or mapped (data-mapping).
++
++Normally, clauses applied to a construct are evaluated before entering that
++construct. Further, in some cases, the OpenMP specification stipulates that
++clauses be evaluated _on the host device_ on entry to a parent `target`
++construct. In particular, the `num_teams` and `thread_limit` clauses of the
++`teams` construct must be evaluated on the host device if it's nested inside or
++combined with a `target` construct.
++
++Additionally, the runtime library targeted by the MLIR to LLVM IR translation of
++the OpenMP dialect supports the optimized launch of SPMD kernels (i.e.
++`target teams distribute parallel {do,for}` in OpenMP), which requires
++specifying in advance what the total trip count of the loop is. Consequently, it
++is also beneficial to evaluate the trip count on the host device prior to the
++kernel launch.
++
++These host-evaluated values in MLIR would need to be placed outside of the
++`omp.target` region and also attached to the corresponding nested operations,
++which is not possible because of the `IsolatedFromAbove` trait. The solution
++implemented to address this problem has been to introduce the `host_eval`
++argument to the `omp.target` operation. It works similarly to a `map` clause,
++but its only intended use is to forward host-evaluated values to their
++corresponding operation inside of the region. Any uses outside of the previously
++described result in a verifier error.
++
++```mlir
++// Initialize %0, %1, %2, %3...
++omp.target host_eval(%0 -> %nt, %1 -> %lb, %2 -> %ub, %3 -> %step : i32, i32, i32, i32) {
++  omp.teams num_teams(to %nt : i32) {
++    omp.parallel {
++      omp.distribute {
++        omp.wsloop {
++          omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
++            // ...
++            omp.yield
++          }
++          omp.terminator
++        } {omp.composite}
++        omp.terminator
++      } {omp.composite}
++      omp.terminator
++    } {omp.composite}
++    omp.terminator
++  }
++  omp.terminator
++}
++```
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/include/mlir/Conversion/Passes.td llvm-project-aso/mlir/include/mlir/Conversion/Passes.td
+--- llvm-project-aso-orig/mlir/include/mlir/Conversion/Passes.td	2024-09-24 18:07:10.079914647 -0500
++++ llvm-project-aso/mlir/include/mlir/Conversion/Passes.td	2024-11-23 20:39:47.200175294 -0600
+@@ -754,7 +754,7 @@
+ }
  
--    The optional $nowait eliminates the implicit barrier so the parent task can make progress
--    even if the target task is not yet completed.
-+    The optional $trip_count indicates the total number of loop iterations, only if this
-+    target region represents a single teams+distribute+parallel worksharing loop.
+ //===----------------------------------------------------------------------===//
+-// MathToLibm
++// MathToROCDL
+ //===----------------------------------------------------------------------===//
  
-     The `depends` and `depend_vars` arguments are variadic lists of values
-     that specify the dependencies of this particular target task in relation to
-     other tasks.
+ def ConvertMathToROCDL : Pass<"convert-math-to-rocdl", "ModuleOp"> {
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
+--- llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h	2024-09-13 09:46:39.630282131 -0500
++++ llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h	2024-11-23 20:39:47.200175294 -0600
+@@ -41,6 +41,12 @@
+ // Extra operation operand structures.
+ //===----------------------------------------------------------------------===//
  
-+    The optional $nowait eliminates the implicit barrier so the parent task can make progress
-+    even if the target task is not yet completed.
++/// Clauses that correspond to operations other than omp.target, but might have
++/// to be evaluated outside of a parent target region.
++using HostEvaluatedOperands =
++    detail::Clauses<LoopRelatedClauseOps, NumTeamsClauseOps,
++                    NumThreadsClauseOps, ThreadLimitClauseOps>;
 +
-     The optional $is_device_ptr indicates list items are device pointers.
+ // TODO: Add `indirect` clause.
+ using DeclareTargetOperands = detail::Clauses<DeviceTypeClauseOps>;
  
-     The optional $has_device_addr indicates that list items already have device
-@@ -1583,13 +1584,22 @@
-     The optional $map_operands maps data from the task’s environment to the
-     device environment.
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
+--- llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td	2024-11-14 15:28:41.918639373 -0600
++++ llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td	2024-11-23 20:39:47.200175294 -0600
+@@ -445,6 +445,44 @@
+ def OpenMP_HintClause : OpenMP_HintClauseSkip<>;
  
--    TODO:  defaultmap, in_reduction
-+    The optional $num_teams_lower, $num_teams_upper and $teams_thread_limit
-+    arguments represent the corresponding arguments of a directly nested TeamsOp. They
-+    can be only set in this operation when representing combined or composite constructs
-+    that include TARGET and TEAMS, so that they can be evaluated in the host device.
+ //===----------------------------------------------------------------------===//
++// Not in the spec: Clause-like structure to hold host-evaluated values.
++//===----------------------------------------------------------------------===//
 +
-+    The optional $num_threads argument represents the corresponding argument of a nested
-+    ParallelOp, which is only allowed if this target region contains a single (possibly
-+    multi-level) nest of OpenMP operations including a ParallelOp.
- 
-+    TODO:  defaultmap, in_reduction
-   }];
- 
-   let arguments = (ins Optional<I1>:$if_expr,
-                        Optional<AnyInteger>:$device,
-                        Optional<AnyInteger>:$thread_limit,
-+                       Optional<AnyInteger>:$trip_count,
-                        OptionalAttr<TaskDependArrayAttr>:$depends,
-                        Variadic<OpenMP_PointerLikeType>:$depend_vars,
-                        UnitAttr:$nowait,
-@@ -1597,7 +1607,11 @@
-                        Variadic<OpenMP_PointerLikeType>:$has_device_addr,
-                        Variadic<AnyType>:$map_operands,
-                        Variadic<AnyType>:$private_vars,
--                       OptionalAttr<SymbolRefArrayAttr>:$privatizers);
-+                       OptionalAttr<SymbolRefArrayAttr>:$privatizers,
-+                       Optional<AnyInteger>:$num_teams_lower,
-+                       Optional<AnyInteger>:$num_teams_upper,
-+                       Optional<AnyInteger>:$teams_thread_limit,
-+                       Optional<AnyInteger>:$num_threads);
- 
-   let regions = (region AnyRegion:$region);
- 
-@@ -1609,16 +1623,36 @@
-     oilist( `if` `(` $if_expr `)`
-     | `device` `(` $device `:` type($device) `)`
-     | `thread_limit` `(` $thread_limit `:` type($thread_limit) `)`
-+    | `trip_count` `(` $trip_count `:` type($trip_count) `)`
-     | `nowait` $nowait
-     | `is_device_ptr` `(` $is_device_ptr `:` type($is_device_ptr) `)`
-     | `has_device_addr` `(` $has_device_addr `:` type($has_device_addr) `)`
-     | `map_entries` `(` custom<MapEntries>($map_operands, type($map_operands)) `)`
-     | `private` `(` custom<PrivateList>($private_vars, type($private_vars), $privatizers) `)`
-     | `depend` `(` custom<DependVarList>($depend_vars, type($depend_vars), $depends) `)`
-+    | `num_teams` `(` ( $num_teams_lower^ `:` type($num_teams_lower) )? `to`
-+                        $num_teams_upper `:` type($num_teams_upper) `)`
-+    | `teams_thread_limit` `(` $teams_thread_limit `:` type($teams_thread_limit) `)`
-+    | `num_threads` `(` $num_threads `:` type($num_threads) `)`
-     ) $region attr-dict
-   }];
- 
-   let hasVerifier = 1;
++class OpenMP_HostEvalClauseSkip<
++    bit traits = false, bit arguments = false, bit assemblyFormat = false,
++    bit description = false, bit extraClassDeclaration = false
++  > : OpenMP_Clause<traits, arguments, assemblyFormat, description,
++                    extraClassDeclaration> {
++  let traits = [
++    BlockArgOpenMPOpInterface
++  ];
++
++  let arguments = (ins
++    Variadic<AnyType>:$host_eval_vars
++  );
 +
 +  let extraClassDeclaration = [{
-+    /// Returns the innermost OpenMP dialect operation nested inside of this
-+    /// operation's region. For an operation to be detected as captured, it must
-+    /// be inside a (possibly multi-level) nest of OpenMP dialect operation's
++    unsigned numHostEvalBlockArgs() {
++      return getHostEvalVars().size();
++    }
++  }];
++
++  let description = [{
++    The optional `host_eval_vars` holds values defined outside of the region of
++    the `IsolatedFromAbove` operation for which a corresponding entry block
++    argument is defined. The only legal uses for these captured values are the
++    following:
++      - `num_teams` or `thread_limit` clause of an immediately nested
++      `omp.teams` operation.
++      - If the operation is the top-level `omp.target` of a target SPMD kernel:
++        - `num_threads` clause of the nested `omp.parallel` operation.
++        - Bounds and steps of the nested `omp.loop_nest` operation.
++  }];
++}
++
++def OpenMP_HostEvalClause : OpenMP_HostEvalClauseSkip<>;
++
++//===----------------------------------------------------------------------===//
+ // V5.2: [3.4] `if` clause
+ //===----------------------------------------------------------------------===//
+ 
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
+--- llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td	2024-10-18 17:40:33.840978593 -0500
++++ llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td	2024-11-23 20:39:47.200175294 -0600
+@@ -25,6 +25,10 @@
+ 
+   let methods = [
+     // Default-implemented methods to be overriden by the corresponding clauses.
++    InterfaceMethod<"Get number of block arguments defined by `host_eval`.",
++                    "unsigned", "numHostEvalBlockArgs", (ins), [{}], [{
++      return 0;
++    }]>,
+     InterfaceMethod<"Get number of block arguments defined by `in_reduction`.",
+                     "unsigned", "numInReductionBlockArgs", (ins), [{}], [{
+       return 0;
+@@ -55,9 +59,14 @@
+     }]>,
+ 
+     // Unified access methods for clause-associated entry block arguments.
++    InterfaceMethod<"Get start index of block arguments defined by `host_eval`.",
++                    "unsigned", "getHostEvalBlockArgsStart", (ins), [{
++      return 0;
++    }]>,
+     InterfaceMethod<"Get start index of block arguments defined by `in_reduction`.",
+                     "unsigned", "getInReductionBlockArgsStart", (ins), [{
+-      return 0;
++      auto iface = ::llvm::cast<BlockArgOpenMPOpInterface>(*$_op);
++      return iface.getHostEvalBlockArgsStart() + $_op.numHostEvalBlockArgs();
+     }]>,
+     InterfaceMethod<"Get start index of block arguments defined by `map`.",
+                     "unsigned", "getMapBlockArgsStart", (ins), [{
+@@ -91,6 +100,13 @@
+       return iface.getUseDeviceAddrBlockArgsStart() + $_op.numUseDeviceAddrBlockArgs();
+     }]>,
+ 
++    InterfaceMethod<"Get block arguments defined by `host_eval`.",
++                    "::llvm::MutableArrayRef<::mlir::BlockArgument>",
++                    "getHostEvalBlockArgs", (ins), [{
++      auto iface = ::llvm::cast<BlockArgOpenMPOpInterface>(*$_op);
++      return $_op->getRegion(0).getArguments().slice(
++          iface.getHostEvalBlockArgsStart(), $_op.numHostEvalBlockArgs());
++    }]>,
+     InterfaceMethod<"Get block arguments defined by `in_reduction`.",
+                     "::llvm::MutableArrayRef<::mlir::BlockArgument>",
+                     "getInReductionBlockArgs", (ins), [{
+@@ -147,10 +163,11 @@
+ 
+   let verify = [{
+     auto iface = ::llvm::cast<BlockArgOpenMPOpInterface>($_op);
+-    unsigned expectedArgs = iface.numInReductionBlockArgs() +
+-        iface.numMapBlockArgs() + iface.numPrivateBlockArgs() +
+-        iface.numReductionBlockArgs() + iface.numTaskReductionBlockArgs() +
+-        iface.numUseDeviceAddrBlockArgs() + iface.numUseDevicePtrBlockArgs();
++    unsigned expectedArgs = iface.numHostEvalBlockArgs() +
++        iface.numInReductionBlockArgs() + iface.numMapBlockArgs() +
++        iface.numPrivateBlockArgs() + iface.numReductionBlockArgs() +
++        iface.numTaskReductionBlockArgs() + iface.numUseDeviceAddrBlockArgs() +
++        iface.numUseDevicePtrBlockArgs();
+     if ($_op->getRegion(0).getNumArguments() < expectedArgs)
+       return $_op->emitOpError() << "expected at least " << expectedArgs
+                                  << " entry block argument(s)";
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+--- llvm-project-aso-orig/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td	2024-11-23 20:25:27.479272877 -0600
++++ llvm-project-aso/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td	2024-11-23 20:39:47.200175294 -0600
+@@ -1213,9 +1213,10 @@
+   ], clauses = [
+     // TODO: Complete clause list (defaultmap, uses_allocators).
+     OpenMP_AllocateClause, OpenMP_DependClause, OpenMP_DeviceClause,
+-    OpenMP_HasDeviceAddrClause, OpenMP_IfClause, OpenMP_InReductionClause,
+-    OpenMP_IsDevicePtrClause, OpenMP_MapClauseSkip<assemblyFormat = true>,
+-    OpenMP_NowaitClause, OpenMP_PrivateClause, OpenMP_ThreadLimitClause
++    OpenMP_HasDeviceAddrClause, OpenMP_HostEvalClause, OpenMP_IfClause,
++    OpenMP_InReductionClause, OpenMP_IsDevicePtrClause,
++    OpenMP_MapClauseSkip<assemblyFormat = true>, OpenMP_NowaitClause,
++    OpenMP_PrivateClause, OpenMP_ThreadLimitClause
+   ], singleRegion = true> {
+   let summary = "target construct";
+   let description = [{
+@@ -1225,6 +1226,21 @@
+     The optional `if_expr` parameter specifies a boolean result of a conditional
+     check. If this value is 1 or is not provided then the target region runs on
+     a device, if it is 0 then the target region is executed on the host device.
++
++    The optional `trip_count` indicates the total number of loop iterations,
++    only if this target region represents a single TEAMS+DISTRIBUTE+PARALLEL
++    worksharing loop.
++
++    The optional `num_teams_lower`, `num_teams_upper` and `teams_thread_limit`
++    arguments represent the corresponding arguments of a directly nested
++    `omp.teams`. They can be only set in this operation when representing
++    combined constructs that include TARGET and TEAMS, so that they can be
++    evaluated in the host device.
++
++    The optional `num_threads` argument represents the corresponding argument of
++    a nested `omp.parallel`, which is only allowed if this target region
++    contains a single (possibly multi-level) nest of OpenMP operations including
++    an `omp.parallel`.
+   }] # clausesDescription;
+ 
+   let builders = [
+@@ -1233,13 +1249,30 @@
+ 
+   let extraClassDeclaration = [{
+     unsigned numMapBlockArgs() { return getMapVars().size(); }
++
++    /// Returns the innermost OpenMP dialect operation captured by this target
++    /// construct. For an operation to be detected as captured, it must be
++    /// inside a (possibly multi-level) nest of OpenMP dialect operation's
 +    /// regions where none of these levels contain other operations considered
 +    /// not-allowed for these purposes (i.e. only terminator operations are
 +    /// allowed from the OpenMP dialect, and other dialect's operations are
 +    /// allowed as long as they don't have a memory write effect).
++    ///
++    /// If there are omp.loop_nest operations in the sequence of nested
++    /// operations, the top level one will be the one captured.
 +    Operation *getInnermostCapturedOmpOp();
 +
 +    /// Tells whether this target region represents a single worksharing loop
 +    /// wrapped by omp.teams omp.distribute and omp.parallel constructs.
 +    bool isTargetSPMDLoop();
-+  }];
- }
- 
+   }] # clausesExtraClassDeclaration;
+ 
+   let assemblyFormat = clausesAssemblyFormat # [{
+-    custom<InReductionMapPrivateRegion>(
+-        $region, $in_reduction_vars, type($in_reduction_vars),
+-        $in_reduction_byref, $in_reduction_syms, $map_vars, type($map_vars),
+-        $private_vars, type($private_vars), $private_syms) attr-dict
++    custom<HostEvalInReductionMapPrivateRegion>(
++        $region, $host_eval_vars, type($host_eval_vars), $in_reduction_vars,
++        type($in_reduction_vars), $in_reduction_byref, $in_reduction_syms,
++        $map_vars, type($map_vars), $private_vars, type($private_vars),
++        $private_syms) attr-dict
+   }];
  
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp llvm-project/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
---- llvm-project.orig/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp	2024-06-12 10:43:15.264182696 -0500
-+++ llvm-project/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp	2024-06-12 10:44:09.359614154 -0500
-@@ -233,11 +233,11 @@
-       });
-   target.addDynamicallyLegalOp<
-       mlir::omp::AtomicUpdateOp, mlir::omp::CriticalOp, mlir::omp::TargetOp,
--      mlir::omp::TargetDataOp, mlir::omp::LoopNestOp,
--      mlir::omp::OrderedRegionOp, mlir::omp::ParallelOp, mlir::omp::WsloopOp,
--      mlir::omp::SimdOp, mlir::omp::MasterOp, mlir::omp::SectionOp,
--      mlir::omp::SectionsOp, mlir::omp::SingleOp, mlir::omp::TaskgroupOp,
--      mlir::omp::TaskOp, mlir::omp::DeclareReductionOp,
-+      mlir::omp::TeamsOp, mlir::omp::DistributeOp, mlir::omp::TargetDataOp,
-+      mlir::omp::LoopNestOp, mlir::omp::OrderedRegionOp, mlir::omp::ParallelOp,
-+      mlir::omp::WsloopOp, mlir::omp::SimdOp, mlir::omp::MasterOp,
-+      mlir::omp::SectionOp, mlir::omp::SectionsOp, mlir::omp::SingleOp,
-+      mlir::omp::TaskgroupOp, mlir::omp::TaskOp, mlir::omp::DeclareReductionOp,
-       mlir::omp::PrivateClauseOp>([&](Operation *op) {
-     return std::all_of(op->getRegions().begin(), op->getRegions().end(),
-                        [&](Region &region) {
-@@ -268,6 +268,7 @@
-       RegionOpConversion<omp::SimdOp>, RegionOpConversion<omp::SingleOp>,
-       RegionOpConversion<omp::TaskgroupOp>, RegionOpConversion<omp::TaskOp>,
-       RegionOpConversion<omp::TargetDataOp>, RegionOpConversion<omp::TargetOp>,
-+      RegionOpConversion<omp::TeamsOp>, RegionOpConversion<omp::DistributeOp>,
-       RegionLessOpWithVarOperandsConversion<omp::AtomicWriteOp>,
-       RegionOpWithVarOperandsConversion<omp::AtomicUpdateOp>,
-       RegionLessOpWithVarOperandsConversion<omp::FlushOp>,
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp llvm-project/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
---- llvm-project.orig/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp	2024-06-12 10:43:15.292182402 -0500
-+++ llvm-project/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp	2024-06-12 10:44:09.359614154 -0500
-@@ -19,7 +19,6 @@
+   let hasVerifier = 1;
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp llvm-project-aso/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+--- llvm-project-aso-orig/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp	2024-11-23 20:25:27.491272834 -0600
++++ llvm-project-aso/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp	2024-11-23 20:39:47.200175294 -0600
+@@ -20,7 +20,6 @@
  #include "mlir/IR/DialectImplementation.h"
  #include "mlir/IR/OpImplementation.h"
  #include "mlir/IR/OperationSupport.h"
@@ -12725,46 +12415,93 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Dialect/OpenMP/IR/OpenM
  
  #include "llvm/ADT/ArrayRef.h"
  #include "llvm/ADT/BitVector.h"
-@@ -62,15 +61,6 @@
-                                             LLVM::LLVMPointerType> {
-   Type getElementType(Type pointer) const { return Type(); }
+@@ -502,6 +501,7 @@
+       : vars(vars), types(types), byref(byref), syms(syms) {}
  };
--
--struct OpenMPDialectFoldInterface : public DialectFoldInterface {
--  using DialectFoldInterface::DialectFoldInterface;
--
--  bool shouldMaterializeInto(Region *region) const final {
--    // Avoid folding constants across target regions
--    return isa<TargetOp>(region->getParentOp());
--  }
--};
- } // namespace
+ struct AllRegionParseArgs {
++  std::optional<MapParseArgs> hostEvalArgs;
+   std::optional<ReductionParseArgs> inReductionArgs;
+   std::optional<MapParseArgs> mapArgs;
+   std::optional<PrivateParseArgs> privateArgs;
+@@ -628,6 +628,11 @@
+                                        AllRegionParseArgs args) {
+   llvm::SmallVector<OpAsmParser::Argument> entryBlockArgs;
+ 
++  if (failed(parseBlockArgClause(parser, entryBlockArgs, "host_eval",
++                                 args.hostEvalArgs)))
++    return parser.emitError(parser.getCurrentLocation())
++           << "invalid `host_eval` format";
++
+   if (failed(parseBlockArgClause(parser, entryBlockArgs, "in_reduction",
+                                  args.inReductionArgs)))
+     return parser.emitError(parser.getCurrentLocation())
+@@ -666,8 +671,10 @@
+   return parser.parseRegion(region, entryBlockArgs);
+ }
+ 
+-static ParseResult parseInReductionMapPrivateRegion(
++static ParseResult parseHostEvalInReductionMapPrivateRegion(
+     OpAsmParser &parser, Region &region,
++    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &hostEvalVars,
++    SmallVectorImpl<Type> &hostEvalTypes,
+     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &inReductionVars,
+     SmallVectorImpl<Type> &inReductionTypes,
+     DenseBoolArrayAttr &inReductionByref, ArrayAttr &inReductionSyms,
+@@ -676,6 +683,7 @@
+     llvm::SmallVectorImpl<OpAsmParser::UnresolvedOperand> &privateVars,
+     llvm::SmallVectorImpl<Type> &privateTypes, ArrayAttr &privateSyms) {
+   AllRegionParseArgs args;
++  args.hostEvalArgs.emplace(hostEvalVars, hostEvalTypes);
+   args.inReductionArgs.emplace(inReductionVars, inReductionTypes,
+                                inReductionByref, inReductionSyms);
+   args.mapArgs.emplace(mapVars, mapTypes);
+@@ -789,6 +797,7 @@
+       : vars(vars), types(types), byref(byref), syms(syms) {}
+ };
+ struct AllRegionPrintArgs {
++  std::optional<MapPrintArgs> hostEvalArgs;
+   std::optional<ReductionPrintArgs> inReductionArgs;
+   std::optional<MapPrintArgs> mapArgs;
+   std::optional<PrivatePrintArgs> privateArgs;
+@@ -867,6 +876,8 @@
+   auto iface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op);
+   MLIRContext *ctx = op->getContext();
+ 
++  printBlockArgClause(p, ctx, "host_eval", iface.getHostEvalBlockArgs(),
++                      args.hostEvalArgs);
+   printBlockArgClause(p, ctx, "in_reduction", iface.getInReductionBlockArgs(),
+                       args.inReductionArgs);
+   printBlockArgClause(p, ctx, "map_entries", iface.getMapBlockArgs(),
+@@ -887,12 +898,14 @@
+   p.printRegion(region, /*printEntryBlockArgs=*/false);
+ }
+ 
+-static void printInReductionMapPrivateRegion(
+-    OpAsmPrinter &p, Operation *op, Region &region, ValueRange inReductionVars,
++static void printHostEvalInReductionMapPrivateRegion(
++    OpAsmPrinter &p, Operation *op, Region &region, ValueRange hostEvalVars,
++    TypeRange hostEvalTypes, ValueRange inReductionVars,
+     TypeRange inReductionTypes, DenseBoolArrayAttr inReductionByref,
+     ArrayAttr inReductionSyms, ValueRange mapVars, TypeRange mapTypes,
+     ValueRange privateVars, TypeRange privateTypes, ArrayAttr privateSyms) {
+   AllRegionPrintArgs args;
++  args.hostEvalArgs.emplace(hostEvalVars, hostEvalTypes);
+   args.inReductionArgs.emplace(inReductionVars, inReductionTypes,
+                                inReductionByref, inReductionSyms);
+   args.mapArgs.emplace(mapVars, mapTypes);
+@@ -1652,20 +1665,170 @@
+   // inReductionByref, inReductionSyms.
+   TargetOp::build(builder, state, /*allocate_vars=*/{}, /*allocator_vars=*/{},
+                   makeArrayAttr(ctx, clauses.dependKinds), clauses.dependVars,
+-                  clauses.device, clauses.hasDeviceAddrVars, clauses.ifExpr,
++                  clauses.device, clauses.hasDeviceAddrVars,
++                  clauses.hostEvalVars, clauses.ifExpr,
+                   /*in_reduction_vars=*/{}, /*in_reduction_byref=*/nullptr,
+                   /*in_reduction_syms=*/nullptr, clauses.isDevicePtrVars,
+                   clauses.mapVars, clauses.nowait, clauses.privateVars,
+                   makeArrayAttr(ctx, clauses.privateSyms), clauses.threadLimit);
+ }
  
- void OpenMPDialect::initialize() {
-@@ -87,7 +77,6 @@
- #include "mlir/Dialect/OpenMP/OpenMPOpsTypes.cpp.inc"
-       >();
- 
--  addInterface<OpenMPDialectFoldInterface>();
-   MemRefType::attachInterface<MemRefPointerLikeModel>(*getContext());
-   LLVM::LLVMPointerType::attachInterface<LLVMPointerPointerLikeModel>(
-       *getContext());
-@@ -1417,19 +1406,170 @@
-   // reductionDeclSymbols.
-   TargetOp::build(
-       builder, state, clauses.ifVar, clauses.deviceVar, clauses.threadLimitVar,
--      makeArrayAttr(ctx, clauses.dependTypeAttrs), clauses.dependVars,
--      clauses.nowaitAttr, clauses.isDevicePtrVars, clauses.hasDeviceAddrVars,
--      clauses.mapVars, clauses.privateVars,
--      makeArrayAttr(ctx, clauses.privatizers));
-+      /*trip_count=*/nullptr, makeArrayAttr(ctx, clauses.dependTypeAttrs),
-+      clauses.dependVars, clauses.nowaitAttr, clauses.isDevicePtrVars,
-+      clauses.hasDeviceAddrVars, clauses.mapVars, clauses.privateVars,
-+      makeArrayAttr(ctx, clauses.privatizers), /*num_teams_lower=*/nullptr,
-+      /*num_teams_upper=*/nullptr, /*teams_thread_limit=*/nullptr,
-+      /*num_threads=*/nullptr);
-+}
-+
 +/// Only allow OpenMP terminators and non-OpenMP ops that have known memory
 +/// effects, but don't include a memory write effect.
 +static bool siblingAllowedInCapture(Operation *op) {
@@ -12806,87 +12543,88 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Dialect/OpenMP/IR/OpenM
 +  return success();
 +}
 +
-+template <typename OpTy>
-+static OpTy getSingleNestedOpOfType(Region &region) {
-+  auto ops = region.getOps<OpTy>();
-+  return std::distance(ops.begin(), ops.end()) != 1 ? OpTy() : *ops.begin();
- }
- 
  LogicalResult TargetOp::verify() {
 +  auto teamsOps = getOps<TeamsOp>();
 +  if (std::distance(teamsOps.begin(), teamsOps.end()) > 1)
 +    return emitError("target containing multiple teams constructs");
 +
-+  if (!isTargetSPMDLoop()) {
-+    if (getTripCount())
-+      return emitError("trip_count set on non-SPMD target region");
++  // Check that host_eval values are only used in legal ways.
++  bool isTargetSPMD = isTargetSPMDLoop();
++  for (Value hostEvalArg :
++       cast<BlockArgOpenMPOpInterface>(getOperation()).getHostEvalBlockArgs()) {
++    for (Operation *user : hostEvalArg.getUsers()) {
++      if (auto teamsOp = dyn_cast<TeamsOp>(user)) {
++        if (llvm::is_contained({teamsOp.getNumTeamsLower(),
++                                teamsOp.getNumTeamsUpper(),
++                                teamsOp.getThreadLimit()},
++                               hostEvalArg))
++          continue;
++
++        return emitOpError() << "host_eval argument only legal as 'num_teams' "
++                                "and 'thread_limit' in 'omp.teams'";
++      }
++      if (auto parallelOp = dyn_cast<ParallelOp>(user)) {
++        if (isTargetSPMD && hostEvalArg == parallelOp.getNumThreads())
++          continue;
 +
-+    if (getNumThreads() && !getSingleNestedOpOfType<ParallelOp>(getRegion()))
-+      return emitError("num_threads set on non-SPMD or loop target region");
-+  }
++        return emitOpError()
++               << "host_eval argument only legal as 'num_threads' in "
++                  "'omp.parallel' when representing target SPMD";
++      }
++      if (auto loopNestOp = dyn_cast<LoopNestOp>(user)) {
++        if (isTargetSPMD &&
++            (llvm::is_contained(loopNestOp.getLoopLowerBounds(), hostEvalArg) ||
++             llvm::is_contained(loopNestOp.getLoopUpperBounds(), hostEvalArg) ||
++             llvm::is_contained(loopNestOp.getLoopSteps(), hostEvalArg)))
++          continue;
++
++        return emitOpError()
++               << "host_eval argument only legal as loop bounds and steps in "
++                  "'omp.loop_nest' when representing target SPMD";
++      }
 +
-+  if (teamsOps.empty()) {
-+    if (getNumTeamsLower() || getNumTeamsUpper() || getTeamsThreadLimit())
-+      return emitError(
-+          "num_teams and teams_thread_limit arguments only allowed if there is "
-+          "an omp.teams child operation");
-+  } else {
-+    if (failed(verifyNumTeamsClause(*this, getNumTeamsLower(),
-+                                    getNumTeamsUpper())))
-+      return failure();
++      return emitOpError() << "host_eval argument illegal use in '"
++                           << user->getName() << "' operation";
++    }
 +  }
 +
    LogicalResult verifyDependVars =
-       verifyDependVarList(*this, getDepends(), getDependVars());
+       verifyDependVarList(*this, getDependKinds(), getDependVars());
    return failed(verifyDependVars) ? verifyDependVars
-                                   : verifyMapClause(*this, getMapOperands());
+                                   : verifyMapClause(*this, getMapVars());
  }
  
 +Operation *TargetOp::getInnermostCapturedOmpOp() {
 +  Dialect *ompDialect = (*this)->getDialect();
 +  Operation *capturedOp = nullptr;
-+  Region *capturedParentRegion = nullptr;
 +
-+  walk<WalkOrder::PostOrder>([&](Operation *op) {
++  // Process in pre-order to check operations from outermost to innermost,
++  // ensuring we only enter the region of an operation if it meets the criteria
++  // for being captured. We stop the exploration of nested operations as soon as
++  // we process a region with no operation to be captured.
++  walk<WalkOrder::PreOrder>([&](Operation *op) {
 +    if (op == *this)
-+      return;
++      return WalkResult::advance();
 +
++    // Ignore operations of other dialects or omp operations with no regions,
++    // because these will only be checked if they are siblings of an omp
++    // operation that can potentially be captured.
 +    bool isOmpDialect = op->getDialect() == ompDialect;
 +    bool hasRegions = op->getNumRegions() > 0;
-+
-+    if (capturedOp) {
-+      bool isImmediateParent = false;
-+      for (Region &region : op->getRegions()) {
-+        if (&region == capturedParentRegion) {
-+          isImmediateParent = true;
-+          capturedParentRegion = op->getParentRegion();
-+          break;
-+        }
-+      }
-+
-+      // Make sure the captured op is part of a (possibly multi-level) nest of
-+      // OpenMP-only operations containing no unsupported siblings at any level.
-+      if ((hasRegions && isOmpDialect != isImmediateParent) ||
-+          (!isImmediateParent && !siblingAllowedInCapture(op))) {
-+        capturedOp = nullptr;
-+        capturedParentRegion = nullptr;
-+      }
-+    } else {
-+      //  The first OpenMP dialect op containing a region found while visiting
-+      //  in post-order should be the innermost captured OpenMP operation.
-+      if (isOmpDialect && hasRegions) {
-+        capturedOp = op;
-+        capturedParentRegion = op->getParentRegion();
-+
-+        // Don't capture this op if it has a not-allowed sibling.
-+        for (Operation &sibling : op->getParentRegion()->getOps()) {
-+          if (&sibling != op && !siblingAllowedInCapture(&sibling)) {
-+            capturedOp = nullptr;
-+            capturedParentRegion = nullptr;
-+          }
-+        }
-+      }
-+    }
++    if (!isOmpDialect || !hasRegions)
++      return WalkResult::skip();
++
++    // Don't capture this op if it has a not-allowed sibling, and stop recursing
++    // into nested operations.
++    for (Operation &sibling : op->getParentRegion()->getOps())
++      if (&sibling != op && !siblingAllowedInCapture(&sibling))
++        return WalkResult::interrupt();
++
++    // Don't continue capturing nested operations if we reach an omp.loop_nest.
++    // Otherwise, process the contents of this operation.
++    capturedOp = op;
++    return llvm::isa<LoopNestOp>(op) ? WalkResult::interrupt()
++                                     : WalkResult::advance();
 +  });
 +
 +  return capturedOp;
@@ -12897,24 +12635,23 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Dialect/OpenMP/IR/OpenM
 +  if (!isa_and_present<LoopNestOp>(capturedOp))
 +    return false;
 +
-+  Operation *workshareOp = capturedOp->getParentOp();
-+
 +  // Accept optional SIMD leaf construct.
++  Operation *workshareOp = capturedOp->getParentOp();
 +  if (isa_and_present<SimdOp>(workshareOp))
 +    workshareOp = workshareOp->getParentOp();
 +
 +  if (!isa_and_present<WsloopOp>(workshareOp))
 +    return false;
 +
-+  Operation *parallelOp = workshareOp->getParentOp();
-+  if (!isa_and_present<ParallelOp>(parallelOp))
++  Operation *distributeOp = workshareOp->getParentOp();
++  if (!isa_and_present<DistributeOp>(distributeOp))
 +    return false;
 +
-+  Operation *distributeOp = parallelOp->getParentOp();
-+  if (!isa_and_present<DistributeOp>(distributeOp))
++  Operation *parallelOp = distributeOp->getParentOp();
++  if (!isa_and_present<ParallelOp>(parallelOp))
 +    return false;
 +
-+  Operation *teamsOp = distributeOp->getParentOp();
++  Operation *teamsOp = parallelOp->getParentOp();
 +  if (!isa_and_present<TeamsOp>(teamsOp))
 +    return false;
 +
@@ -12924,25 +12661,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Dialect/OpenMP/IR/OpenM
  //===----------------------------------------------------------------------===//
  // ParallelOp
  //===----------------------------------------------------------------------===//
-@@ -1525,6 +1665,17 @@
-     return emitError(
-         "expected equal sizes for allocate and allocator variables");
- 
-+  auto offloadModOp =
-+      llvm::cast<OffloadModuleInterface>(*(*this)->getParentOfType<ModuleOp>());
-+  if (!offloadModOp.getIsTargetDevice()) {
-+    auto targetOp = (*this)->getParentOfType<omp::TargetOp>();
-+    if (getNumThreadsVar() && targetOp &&
-+        (targetOp.isTargetSPMDLoop() ||
-+         getSingleNestedOpOfType<ParallelOp>(targetOp.getRegion()) == *this))
-+      return emitError("num_threads argument expected to be attached to parent "
-+                       "omp.target operation instead");
-+  }
-+
-   if (failed(verifyPrivateVarList(*this)))
-     return failure();
- 
-@@ -1558,23 +1709,23 @@
+@@ -1798,24 +1961,16 @@
    // Check parent region
    // TODO If nested inside of a target region, also check that it does not
    // contain any statements, declarations or directives other than this
@@ -12967,24 +12686,138 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Dialect/OpenMP/IR/OpenM
 -    if (numTeamsLowerBound.getType() != numTeamsUpperBound.getType())
 -      return emitError(
 -          "expected num_teams upper bound and lower bound to be the same type");
-+  auto offloadModOp =
-+      llvm::cast<OffloadModuleInterface>(*(*this)->getParentOfType<ModuleOp>());
-+  if (targetOp && !offloadModOp.getIsTargetDevice()) {
-+    if (getNumTeamsLower() || getNumTeamsUpper() || getThreadLimit())
-+      return emitError("num_teams and thread_limit arguments expected to be "
-+                       "attached to parent omp.target operation");
-+  } else {
-+    if (failed(verifyNumTeamsClause(*this, getNumTeamsLower(),
-+                                    getNumTeamsUpper())))
-+      return failure();
-   }
+-  }
++  if (failed(
++          verifyNumTeamsClause(*this, getNumTeamsLower(), getNumTeamsUpper())))
++    return failure();
  
    // Check for allocate clause restrictions
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp llvm-project/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
---- llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp	2024-06-12 10:43:15.324182065 -0500
-+++ llvm-project/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp	2024-06-12 10:44:09.359614154 -0500
-@@ -264,6 +264,143 @@
-   llvm_unreachable("Unknown ClauseProcBindKind kind");
+   if (getAllocateVars().size() != getAllocatorVars().size())
+@@ -2120,6 +2275,7 @@
+     if (!isComposite())
+       return emitError()
+              << "'omp.composite' attribute missing from composite wrapper";
++
+     // Check for the allowed leaf constructs that may appear in a composite
+     // construct directly after DISTRIBUTE.
+     if (isa<WsloopOp>(nested)) {
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp llvm-project-aso/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+--- llvm-project-aso-orig/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp	2024-11-23 20:25:27.499272806 -0600
++++ llvm-project-aso/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp	2024-11-23 20:39:47.200175294 -0600
+@@ -32,6 +32,7 @@
+ #include "llvm/IR/ReplaceConstant.h"
+ #include "llvm/Support/FileSystem.h"
+ #include "llvm/TargetParser/Triple.h"
++#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+ #include "llvm/Transforms/Utils/ModuleUtils.h"
+ 
+ #include <any>
+@@ -166,6 +167,10 @@
+     if (op.getDevice())
+       result = todo("device");
+   };
++  auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
++    if (op.getDistScheduleStatic() || op.getDistScheduleChunkSize())
++      result = todo("dist_schedule");
++  };
+   auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) {
+     if (!op.getHasDeviceAddrVars().empty())
+       result = todo("has_device_addr");
+@@ -174,10 +179,6 @@
+     if (op.getHint())
+       op.emitWarning("hint clause discarded");
+   };
+-  auto checkIf = [&todo](auto op, LogicalResult &result) {
+-    if (op.getIfExpr())
+-      result = todo("if");
+-  };
+   auto checkInReduction = [&todo](auto op, LogicalResult &result) {
+     if (!op.getInReductionVars().empty() || op.getInReductionByref() ||
+         op.getInReductionSyms())
+@@ -224,10 +225,6 @@
+         op.getReductionSyms())
+       result = todo("reduction");
+   };
+-  auto checkThreadLimit = [&todo](auto op, LogicalResult &result) {
+-    if (op.getThreadLimit())
+-      result = todo("thread_limit");
+-  };
+   auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
+     if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() ||
+         op.getTaskReductionSyms())
+@@ -252,7 +249,6 @@
+       .Case([&](omp::TeamsOp op) {
+         checkAllocate(op, result);
+         checkPrivate(op, result);
+-        checkReduction(op, result);
+       })
+       .Case([&](omp::TaskOp op) {
+         checkAllocate(op, result);
+@@ -287,11 +283,16 @@
+             omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
+       .Case<omp::TargetEnterDataOp, omp::TargetExitDataOp, omp::TargetUpdateOp>(
+           [&](auto op) { checkDepend(op, result); })
++      .Case([&](omp::DistributeOp op) {
++        checkAllocate(op, result);
++        checkDistSchedule(op, result);
++        checkOrder(op, result);
++        checkPrivate(op, result);
++      })
+       .Case([&](omp::TargetOp op) {
+         checkAllocate(op, result);
+         checkDevice(op, result);
+         checkHasDeviceAddr(op, result);
+-        checkIf(op, result);
+         checkInReduction(op, result);
+         checkIsDevicePtr(op, result);
+         // Privatization clauses are supported, except on some situations, so we
+@@ -311,7 +312,6 @@
+                                     "structures in omp.target operation");
+           }
+         }
+-        checkThreadLimit(op, result);
+       })
+       .Default([](Operation &) {
+         // Assume all clauses for an operation can be translated unless they are
+@@ -391,6 +391,8 @@
+     Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
+     LLVM::ModuleTranslation &moduleTranslation,
+     SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
++  bool isLoopWrapper = isa<omp::LoopWrapperInterface>(region.getParentOp());
++
+   llvm::BasicBlock *continuationBlock =
+       splitBB(builder, true, "omp.region.cont");
+   llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
+@@ -412,7 +414,12 @@
+   bool operandsProcessed = false;
+   unsigned numYields = 0;
+   for (Block &bb : region.getBlocks()) {
+-    if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
++    // Prevent loop wrappers from crashing, as they have no terminators.
++    if (isLoopWrapper)
++      continue;
++
++    if (omp::YieldOp yield =
++            dyn_cast_if_present<omp::YieldOp>(bb.getTerminator())) {
+       if (!operandsProcessed) {
+         for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
+           continuationBlockPHITypes.push_back(
+@@ -468,6 +475,13 @@
+             moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder)))
+       return llvm::make_error<PreviouslyReportedError>();
+ 
++    // Create branch here for loop wrappers to prevent their lack of a
++    // terminator from causing a crash below.
++    if (isLoopWrapper) {
++      builder.CreateBr(continuationBlock);
++      continue;
++    }
++
+     // Special handling for `omp.yield` and `omp.terminator` (we may have more
+     // than one): they return the control to the parent OpenMP dialect operation
+     // so replace them with the branch to the continuation block. We handle this
+@@ -569,6 +583,150 @@
+   return success();
  }
  
 +/// Populate a set of previously created llvm.alloca instructions that are only
@@ -13055,8 +12888,8 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +  // relying on captured variables.
 +  SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
 +  SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
-+  LogicalResult bodyGenStatus = success();
-+  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
++  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
++                     llvm::Value *iv) -> llvm::Error {
 +    // Make sure further conversions know about the induction variable.
 +    moduleTranslation.mapValue(
 +        loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
@@ -13067,7 +12900,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +    bodyInsertPoints.push_back(ip);
 +
 +    if (loopInfos.size() != loopOp.getNumLoops() - 1)
-+      return;
++      return llvm::Error::success();
 +
 +    // Convert the body of the loop, adding lifetime markers to allocations that
 +    // can be sunk into the new block.
@@ -13076,14 +12909,16 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +      unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8;
 +      builder.CreateLifetimeStart(alloca, builder.getInt64(size));
 +    }
-+    llvm::BasicBlock *cont =
-+        convertOmpOpRegions(loopOp.getRegion(), blockName, builder,
-+                            moduleTranslation, bodyGenStatus);
-+    builder.SetInsertPoint(cont, cont->begin());
++    llvm::Expected<llvm::BasicBlock *> cont = convertOmpOpRegions(
++        loopOp.getRegion(), blockName, builder, moduleTranslation);
++    if (!cont)
++      return cont.takeError();
++    builder.SetInsertPoint(*cont, (*cont)->begin());
 +    for (auto *alloca : allocasToSink) {
 +      unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8;
 +      builder.CreateLifetimeEnd(alloca, builder.getInt64(size));
 +    }
++    return llvm::Error::success();
 +  };
 +
 +  // Delegate actual loop construction to the OpenMP IRBuilder.
@@ -13093,10 +12928,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +  // cases.
 +  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
 +    llvm::Value *lowerBound =
-+        moduleTranslation.lookupValue(loopOp.getLowerBound()[i]);
++        moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
 +    llvm::Value *upperBound =
-+        moduleTranslation.lookupValue(loopOp.getUpperBound()[i]);
-+    llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]);
++        moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
++    llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
 +
 +    // Make sure loop trip count are emitted in the preheader of the outermost
 +    // loop at the latest so that they are all available for the new collapsed
@@ -13104,15 +12939,20 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +    llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
 +    llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
 +    if (i != 0) {
-+      loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back());
++      loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
++                                                       ompLoc.DL);
 +      computeIP = loopInfos.front()->getPreheaderIP();
 +    }
-+    loopInfos.push_back(ompBuilder->createCanonicalLoop(
-+        loc, bodyGen, lowerBound, upperBound, step,
-+        /*IsSigned=*/true, loopOp.getInclusive(), computeIP));
 +
-+    if (failed(bodyGenStatus))
++    llvm::Expected<llvm::CanonicalLoopInfo *> loopResult =
++        ompBuilder->createCanonicalLoop(
++            loc, bodyGen, lowerBound, upperBound, step,
++            /*IsSigned=*/true, /*InclusiveStop=*/true, computeIP);
++
++    if (failed(handleError(loopResult, *loopOp)))
 +      return std::nullopt;
++
++    loopInfos.push_back(*loopResult);
 +  }
 +
 +  // Collapse loops. Store the insertion point because LoopInfos may get
@@ -13124,96 +12964,188 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +  return std::make_tuple(ompLoc, afterIP, loopInfo);
 +}
 +
- /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
+ /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
  static LogicalResult
- convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
-@@ -676,9 +813,11 @@
-   if (Value ifExprVar = op.getIfExpr())
-     ifExpr = moduleTranslation.lookupValue(ifExprVar);
+ convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
+@@ -992,19 +1150,37 @@
+       // variable allocated in the inlined region)
+       llvm::Value *var = builder.CreateAlloca(
+           moduleTranslation.convertType(reductionDecls[i].getType()));
+-      deferredStores.emplace_back(phis[0], var);
++      var->setName("private_redvar");
+ 
+-      privateReductionVariables[i] = var;
+-      moduleTranslation.mapValue(reductionArgs[i], phis[0]);
+-      reductionVariableMap.try_emplace(loop.getReductionVars()[i], phis[0]);
++      llvm::Type *ptrTy = llvm::PointerType::getUnqual(builder.getContext());
++      llvm::Value *castVar =
++          builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
++      // TODO: I (Sergio) just guessed casting phis[0] like it's done for var is
++      // what's supposed to happen with this code coming from a merge from main,
++      // but I don't actually know. Someone more familiar with it needs to check
++      // this.
++      llvm::Value *castPhi =
++          builder.CreatePointerBitCastOrAddrSpaceCast(phis[0], ptrTy);
++
++      deferredStores.emplace_back(castPhi, castVar);
++
++      privateReductionVariables[i] = castVar;
++      moduleTranslation.mapValue(reductionArgs[i], castPhi);
++      reductionVariableMap.try_emplace(loop.getReductionVars()[i], castPhi);
+     } else {
+       assert(allocRegion.empty() &&
+              "allocaction is implicit for by-val reduction");
+       llvm::Value *var = builder.CreateAlloca(
+           moduleTranslation.convertType(reductionDecls[i].getType()));
+-      moduleTranslation.mapValue(reductionArgs[i], var);
+-      privateReductionVariables[i] = var;
+-      reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
++      var->setName("private_redvar");
++
++      llvm::Type *ptrTy = llvm::PointerType::getUnqual(builder.getContext());
++      llvm::Value *castVar =
++          builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
++
++      moduleTranslation.mapValue(reductionArgs[i], castVar);
++      privateReductionVariables[i] = castVar;
++      reductionVariableMap.try_emplace(loop.getReductionVars()[i], castVar);
+     }
+   }
  
-+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
-   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
--  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(
-+  builder.restoreIP(ompBuilder->createTeams(
-       ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr));
+@@ -1117,18 +1293,20 @@
+     LLVM::ModuleTranslation &moduleTranslation,
+     llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
+     SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+-    ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef) {
++    ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef,
++    bool isNowait = false, bool isTeamsReduction = false) {
+   // Process the reductions if required.
+   if (op.getNumReductionVars() == 0)
+     return success();
+ 
++  SmallVector<OwningReductionGen> owningReductionGens;
++  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
++  SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
 +
-   return bodyGenStatus;
+   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ 
+   // Create the reduction generators. We need to own them here because
+   // ReductionInfo only accepts references to the generators.
+-  SmallVector<OwningReductionGen> owningReductionGens;
+-  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
+-  SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
+   collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
+                        owningReductionGens, owningAtomicReductionGens,
+                        privateReductionVariables, reductionInfos);
+@@ -1140,7 +1318,7 @@
+   builder.SetInsertPoint(tempTerminator);
+   llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
+       ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
+-                                   isByRef, op.getNowait());
++                                   isByRef, isNowait, isTeamsReduction);
+ 
+   if (failed(handleError(contInsertPoint, *op)))
+     return failure();
+@@ -1166,7 +1344,6 @@
+   return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
+                                 moduleTranslation, builder,
+                                 "omp.reduction.cleanup");
+-  return success();
  }
  
-@@ -779,9 +918,15 @@
-       continue;
-     llvm::Value *var = builder.CreateAlloca(
-         moduleTranslation.convertType(reductionDecls[i].getType()));
--    moduleTranslation.mapValue(args[i], var);
--    privateReductionVariables.push_back(var);
--    reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
-+
-+    var->setName("private_redvar");
-+    llvm::Type *ptrTy = llvm::PointerType::getUnqual(builder.getContext());
-+    llvm::Value *castVar =
-+        builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
-+
-+    moduleTranslation.mapValue(args[i], castVar);
-+    privateReductionVariables.push_back(castVar);
-+    reductionVariableMap.try_emplace(loop.getReductionVars()[i], castVar);
-   }
+ static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
+@@ -1429,9 +1606,9 @@
+   builder.restoreIP(*afterIP);
+ 
+   // Process the reductions if required.
+-  return createReductionsAndCleanup(sectionsOp, builder, moduleTranslation,
+-                                    allocaIP, reductionDecls,
+-                                    privateReductionVariables, isByRef);
++  return createReductionsAndCleanup(
++      sectionsOp, builder, moduleTranslation, allocaIP, reductionDecls,
++      privateReductionVariables, isByRef, sectionsOp.getNowait());
  }
  
-@@ -815,7 +960,6 @@
-     const SmallVector<llvm::Value *> &privateReductionVariables,
-     SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> &reductionInfos) {
-   unsigned numReductions = loop.getNumReductionVars();
--
-   for (unsigned i = 0; i < numReductions; ++i) {
-     owningReductionGens.push_back(
-         makeReductionGen(reductionDecls[i], builder, moduleTranslation));
-@@ -825,10 +969,12 @@
- 
-   // Collect the reduction information.
-   reductionInfos.reserve(numReductions);
-+
-   for (unsigned i = 0; i < numReductions; ++i) {
-     llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
-     if (owningAtomicReductionGens[i])
-       atomicGen = owningAtomicReductionGens[i];
-+
-     llvm::Value *variable =
-         moduleTranslation.lookupValue(loop.getReductionVars()[i]);
-     reductionInfos.push_back(
-@@ -883,9 +1029,15 @@
+ /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
+@@ -1485,6 +1662,33 @@
+   if (failed(checkImplementationStatus(*op)))
+     return failure();
+ 
++  llvm::ArrayRef<bool> isByRef = getIsByRef(op.getReductionByref());
++  assert(isByRef.size() == op.getNumReductionVars());
++
++  SmallVector<omp::DeclareReductionOp> reductionDecls;
++  collectReductionDecls(op, reductionDecls);
++  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
++      findAllocaInsertPoint(builder, moduleTranslation);
++
++  SmallVector<llvm::Value *> privateReductionVariables(
++      op.getNumReductionVars());
++  DenseMap<Value, llvm::Value *> reductionVariableMap;
++
++  MutableArrayRef<BlockArgument> reductionArgs =
++      llvm::cast<omp::BlockArgOpenMPOpInterface>(*op).getReductionBlockArgs();
++
++  if (failed(allocAndInitializeReductionVars(
++          op, reductionArgs, builder, moduleTranslation, allocaIP,
++          reductionDecls, privateReductionVariables, reductionVariableMap,
++          isByRef)))
++    return failure();
++
++  // Store the mapping between reduction variables and their private copies on
++  // ModuleTranslation stack. It can be then recovered when translating
++  // omp.reduce operations in a separate call.
++  LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
++      moduleTranslation, reductionVariableMap);
++
+   auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
+     LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+         moduleTranslation, allocaIP);
+@@ -1519,6 +1723,13 @@
+     return failure();
+ 
+   builder.restoreIP(*afterIP);
++
++  // Process the reductions if required.
++  return createReductionsAndCleanup(
++      op, builder, moduleTranslation, allocaIP, reductionDecls,
++      privateReductionVariables, isByRef,
++      /*isNoWait*/ false, /*isTeamsReduction*/ true);
++
+   return success();
  }
  
- /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
--static LogicalResult
--convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
--                 LLVM::ModuleTranslation &moduleTranslation) {
-+static LogicalResult convertOmpWsloop(
-+    Operation &opInst, llvm::IRBuilderBase &builder,
-+    LLVM::ModuleTranslation &moduleTranslation,
-+    llvm::OpenMPIRBuilder::InsertPointTy redAllocaIP,
-+    SmallVector<OwningReductionGen> &owningReductionGens,
-+    SmallVector<OwningAtomicReductionGen> &owningAtomicReductionGens,
-+    SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> &reductionInfos) {
+@@ -1713,6 +1924,11 @@
+ static LogicalResult
+ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
+                  LLVM::ModuleTranslation &moduleTranslation) {
++  llvm::OpenMPIRBuilder::InsertPointTy redAllocaIP =
++      findAllocaInsertPoint(builder, moduleTranslation);
++
 +  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
 +  // FIXME: This ignores any other nested wrappers (e.g. omp.simd).
    auto wsloopOp = cast<omp::WsloopOp>(opInst);
-   auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
- 
-@@ -908,12 +1060,10 @@
+   if (failed(checkImplementationStatus(opInst)))
+     return failure();
+@@ -1738,8 +1954,6 @@
  
    SmallVector<omp::DeclareReductionOp> reductionDecls;
    collectReductionDecls(wsloopOp, reductionDecls);
 -  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
 -      findAllocaInsertPoint(builder, moduleTranslation);
  
-   SmallVector<llvm::Value *> privateReductionVariables;
-   DenseMap<Value, llvm::Value *> reductionVariableMap;
--  allocByValReductionVars(wsloopOp, builder, moduleTranslation, allocaIP,
-+  allocByValReductionVars(wsloopOp, builder, moduleTranslation, redAllocaIP,
-                           reductionDecls, privateReductionVariables,
-                           reductionVariableMap, isByRef);
+   SmallVector<llvm::Value *> privateReductionVariables(
+       wsloopOp.getNumReductionVars());
+@@ -1749,7 +1963,7 @@
+       cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
  
-@@ -966,6 +1116,9 @@
+   if (failed(allocAndInitializeReductionVars(
+-          wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP,
++          wsloopOp, reductionArgs, builder, moduleTranslation, redAllocaIP,
+           reductionDecls, privateReductionVariables, reductionVariableMap,
+           isByRef)))
+     return failure();
+@@ -1770,6 +1984,9 @@
    // Set up the source location value for OpenMP runtime.
    llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
  
@@ -13221,42 +13153,49 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +  getSinkableAllocas(moduleTranslation, loopOp.getRegion(), allocasToSink);
 +
    // Generator of the canonical loop body.
-   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
-   // relying on captured variables.
-@@ -985,10 +1138,21 @@
+   SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
+   SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
+@@ -1787,11 +2004,27 @@
      if (loopInfos.size() != loopOp.getNumLoops() - 1)
-       return;
+       return llvm::Error::success();
  
 -    // Convert the body of the loop.
 +    // Convert the body of the loop, adding lifetime markers to allocations that
 +    // can be sunk into the new block.
      builder.restoreIP(ip);
--    convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder,
--                        moduleTranslation, bodyGenStatus);
+-    return convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder,
+-                               moduleTranslation)
+-        .takeError();
 +    for (auto *alloca : allocasToSink) {
 +      unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8;
 +      builder.CreateLifetimeStart(alloca, builder.getInt64(size));
 +    }
-+    llvm::BasicBlock *cont =
++
++    llvm::Expected<llvm::BasicBlock *> cont =
 +        convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder,
-+                            moduleTranslation, bodyGenStatus);
-+    builder.SetInsertPoint(cont, cont->begin());
++                            moduleTranslation);
++    if (!cont)
++      return cont.takeError();
++
++    builder.SetInsertPoint(*cont, (*cont)->begin());
++
 +    for (auto *alloca : allocasToSink) {
 +      unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8;
 +      builder.CreateLifetimeEnd(alloca, builder.getInt64(size));
 +    }
++    return llvm::Error::success();
    };
  
    // Delegate actual loop construction to the OpenMP IRBuilder.
-@@ -996,7 +1160,6 @@
+@@ -1799,7 +2032,6 @@
    // loop, i.e. it has a positive step, uses signed integer semantics.
    // Reconsider this code when the nested loop operation clearly supports more
    // cases.
 -  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
    for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
      llvm::Value *lowerBound =
-         moduleTranslation.lookupValue(loopOp.getLowerBound()[i]);
-@@ -1027,7 +1190,8 @@
+         moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
+@@ -1834,19 +2066,30 @@
    llvm::CanonicalLoopInfo *loopInfo =
        ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
  
@@ -13265,11 +13204,16 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +      findAllocaInsertPoint(builder, moduleTranslation);
  
    // TODO: Handle doacross loops when the ordered clause has a parameter.
-   bool isOrdered = wsloopOp.getOrderedVal().has_value();
-@@ -1035,11 +1199,22 @@
-       wsloopOp.getScheduleModifier();
-   bool isSimd = wsloopOp.getSimdModifier();
- 
+   bool isOrdered = wsloopOp.getOrdered().has_value();
+   std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
+   bool isSimd = wsloopOp.getScheduleSimd();
+ 
+-  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
+-      ompBuilder->applyWorkshareLoop(
+-          ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(),
+-          convertToScheduleKind(schedule), chunk, isSimd,
+-          scheduleMod == omp::ScheduleModifier::monotonic,
+-          scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered);
 +  bool distributeCodeGen = opInst.getParentOfType<omp::DistributeOp>();
 +  bool parallelCodeGen = opInst.getParentOfType<omp::ParallelOp>();
 +  llvm::omp::WorksharingLoopType workshareLoopType;
@@ -13280,77 +13224,39 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +  } else {
 +    workshareLoopType = llvm::omp::WorksharingLoopType::ForStaticLoop;
 +  }
-   ompBuilder->applyWorkshareLoop(
-       ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(),
-       convertToScheduleKind(schedule), chunk, isSimd,
-       scheduleModifier == omp::ScheduleModifier::monotonic,
--      scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered);
-+      scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered,
++  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = ompBuilder->applyWorkshareLoop(
++      ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(),
++      convertToScheduleKind(schedule), chunk, isSimd,
++      scheduleMod == omp::ScheduleModifier::monotonic,
++      scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
 +      workshareLoopType);
  
-   // Continue building IR after the loop. Note that the LoopInfo returned by
-   // `collapseLoops` points inside the outermost loop and is intended for
-@@ -1053,21 +1228,20 @@
- 
-   // Create the reduction generators. We need to own them here because
-   // ReductionInfo only accepts references to the generators.
--  SmallVector<OwningReductionGen> owningReductionGens;
--  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
--  SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
-   collectReductionInfo(wsloopOp, builder, moduleTranslation, reductionDecls,
-                        owningReductionGens, owningAtomicReductionGens,
-                        privateReductionVariables, reductionInfos);
--
-   // The call to createReductions below expects the block to have a
-   // terminator. Create an unreachable instruction to serve as terminator
-   // and remove it later.
-   llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
-   builder.SetInsertPoint(tempTerminator);
-+
-   llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
-       ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
--                                   isByRef, wsloopOp.getNowait());
-+                                   isByRef, wsloopOp.getNowait(),
-+                                   /*IsTeamsReduction=*/false,
-+                                   /*HasDistribute=*/distributeCodeGen);
-   if (!contInsertPoint.getBlock())
-     return wsloopOp->emitOpError() << "failed to convert reductions";
-   auto nextInsertionPoint =
-@@ -1086,6 +1260,20 @@
-                                 "omp.reduction.cleanup");
+   if (failed(handleError(wsloopIP, opInst)))
+     return failure();
+@@ -1858,9 +2101,10 @@
+   builder.restoreIP(afterIP);
+ 
+   // Process the reductions if required.
+-  return createReductionsAndCleanup(wsloopOp, builder, moduleTranslation,
+-                                    allocaIP, reductionDecls,
+-                                    privateReductionVariables, isByRef);
++  return createReductionsAndCleanup(
++      wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
++      privateReductionVariables, isByRef, wsloopOp.getNowait(),
++      /*isTeamsReduction=*/false);
  }
  
-+static LogicalResult
-+convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
-+                 LLVM::ModuleTranslation &moduleTranslation) {
-+  llvm::OpenMPIRBuilder::InsertPointTy redAllocaIP =
-+      findAllocaInsertPoint(builder, moduleTranslation);
-+  SmallVector<OwningReductionGen> owningReductionGens;
-+  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
-+  SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
-+
-+  return convertOmpWsloop(opInst, builder, moduleTranslation, redAllocaIP,
-+                          owningReductionGens, owningAtomicReductionGens,
-+                          reductionInfos);
-+}
-+
- /// A RAII class that on construction replaces the region arguments of the
- /// parallel op (which correspond to private variables) with the actual private
- /// variables they correspond to. This prepares the parallel op so that it
-@@ -1224,10 +1412,10 @@
-       // Generate reductions from info
-       llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
-       builder.SetInsertPoint(tempTerminator);
--
-       llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
+ /// Converts the OpenMP parallel operation to LLVM IR.
+@@ -2072,7 +2316,7 @@
+ 
+       llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
            ompBuilder->createReductions(builder.saveIP(), allocaIP,
 -                                       reductionInfos, isByRef, false);
-+                                       reductionInfos, isByRef, false, false,
-+                                       false);
-       if (!contInsertPoint.getBlock()) {
-         bodyGenStatus = opInst->emitOpError() << "failed to convert reductions";
-         return;
-@@ -1409,71 +1597,16 @@
++                                       reductionInfos, isByRef, false, false);
+       if (!contInsertPoint)
+         return contInsertPoint.takeError();
+ 
+@@ -2171,77 +2415,19 @@
  static LogicalResult
  convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
                 LLVM::ModuleTranslation &moduleTranslation) {
@@ -13358,15 +13264,16 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
    auto simdOp = cast<omp::SimdOp>(opInst);
    auto loopOp = cast<omp::LoopNestOp>(simdOp.getWrappedLoop());
  
+   if (failed(checkImplementationStatus(opInst)))
+     return failure();
+ 
 -  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
 -
 -  // Generator of the canonical loop body.
--  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
--  // relying on captured variables.
 -  SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
 -  SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
--  LogicalResult bodyGenStatus = success();
--  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
+-  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
+-                     llvm::Value *iv) -> llvm::Error {
 -    // Make sure further conversions know about the induction variable.
 -    moduleTranslation.mapValue(
 -        loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
@@ -13377,12 +13284,13 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 -    bodyInsertPoints.push_back(ip);
 -
 -    if (loopInfos.size() != loopOp.getNumLoops() - 1)
--      return;
+-      return llvm::Error::success();
 -
 -    // Convert the body of the loop.
 -    builder.restoreIP(ip);
--    convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder,
--                        moduleTranslation, bodyGenStatus);
+-    return convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder,
+-                               moduleTranslation)
+-        .takeError();
 -  };
 -
 -  // Delegate actual loop construction to the OpenMP IRBuilder.
@@ -13393,10 +13301,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 -  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
 -  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
 -    llvm::Value *lowerBound =
--        moduleTranslation.lookupValue(loopOp.getLowerBound()[i]);
+-        moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
 -    llvm::Value *upperBound =
--        moduleTranslation.lookupValue(loopOp.getUpperBound()[i]);
--    llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]);
+-        moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
+-    llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
 -
 -    // Make sure loop trip count are emitted in the preheader of the outermost
 -    // loop at the latest so that they are all available for the new collapsed
@@ -13408,18 +13316,22 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 -                                                       ompLoc.DL);
 -      computeIP = loopInfos.front()->getPreheaderIP();
 -    }
--    loopInfos.push_back(ompBuilder->createCanonicalLoop(
--        loc, bodyGen, lowerBound, upperBound, step,
--        /*IsSigned=*/true, /*Inclusive=*/true, computeIP));
+-
+-    llvm::Expected<llvm::CanonicalLoopInfo *> loopResult =
+-        ompBuilder->createCanonicalLoop(
+-            loc, bodyGen, lowerBound, upperBound, step,
+-            /*IsSigned=*/true, /*InclusiveStop=*/true, computeIP);
+-
+-    if (failed(handleError(loopResult, *loopOp)))
+-      return failure();
+-
+-    loopInfos.push_back(*loopResult);
+-  }
 +  auto loopNestConversionResult = convertLoopNestHelper(
 +      *loopOp, builder, moduleTranslation, "omp.simd.region");
 +  if (!loopNestConversionResult)
 +    return failure();
  
--    if (failed(bodyGenStatus))
--      return failure();
--  }
--
 -  // Collapse loops.
 -  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
 -  llvm::CanonicalLoopInfo *loopInfo =
@@ -13428,7 +13340,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
  
    llvm::ConstantInt *simdlen = nullptr;
    if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
-@@ -1962,7 +2095,8 @@
+@@ -2751,7 +2937,8 @@
        // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
        // some adjustment for members with more complex types.
        return builder.CreateMul(elementCount,
@@ -13438,60 +13350,80 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
      }
    }
  
-@@ -2725,8 +2859,12 @@
-           argIndex++;
-         }
- 
--        bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
--                                                builder, moduleTranslation);
+@@ -3175,7 +3362,7 @@
+ 
+     combinedInfo.Types.emplace_back(mapFlag);
+     combinedInfo.DevicePointers.emplace_back(
+-        mapData.DevicePointers[memberDataIdx]);
++        llvm::OpenMPIRBuilder::DeviceInfoTy::None);
+     combinedInfo.Names.emplace_back(
+         LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
+     uint64_t basePointerIndex =
+@@ -3557,9 +3744,14 @@
+                        return info.DevicePtrInfoMap[basePointer].second;
+                      });
+ 
+-        if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
+-                                           moduleTranslation)))
+-          return llvm::make_error<PreviouslyReportedError>();
 +        SmallVector<llvm::PHINode *> phis;
-+        llvm::BasicBlock *continuationBlock =
++        llvm::Expected<llvm::BasicBlock *> continuationBlock =
 +            convertOmpOpRegions(region, "omp.data.region", builder,
-+                                moduleTranslation, bodyGenStatus, &phis);
-+        builder.SetInsertPoint(continuationBlock,
-+                               continuationBlock->getFirstInsertionPt());
++                                moduleTranslation, &phis);
++        if (!continuationBlock)
++          return continuationBlock.takeError();
++        builder.SetInsertPoint(*continuationBlock,
++                               (*continuationBlock)->getFirstInsertionPt());
        }
        break;
      case BodyGenTy::DupNoPriv:
-@@ -2735,8 +2873,12 @@
+@@ -3568,6 +3760,7 @@
        // If device info is available then region has already been generated
        if (info.DevicePtrInfoMap.empty()) {
          builder.restoreIP(codeGenIP);
--        bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
--                                                builder, moduleTranslation);
++
+         // For device pass, if use_device_ptr(addr) mappings were present,
+         // we need to link them here before codegen.
+         if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
+@@ -3579,9 +3772,14 @@
+                        useDevicePtrVars, mapData);
+         }
+ 
+-        if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
+-                                           moduleTranslation)))
+-          return llvm::make_error<PreviouslyReportedError>();
 +        SmallVector<llvm::PHINode *> phis;
-+        llvm::BasicBlock *continuationBlock =
++        llvm::Expected<llvm::BasicBlock *> continuationBlock =
 +            convertOmpOpRegions(region, "omp.data.region", builder,
-+                                moduleTranslation, bodyGenStatus, &phis);
-+        builder.SetInsertPoint(continuationBlock,
-+                               continuationBlock->getFirstInsertionPt());
++                                moduleTranslation, &phis);
++        if (!continuationBlock)
++          return continuationBlock.takeError();
++        builder.SetInsertPoint(*continuationBlock,
++                               (*continuationBlock)->getFirstInsertionPt());
        }
        break;
      }
-@@ -2759,6 +2901,90 @@
-   return bodyGenStatus;
+@@ -3608,6 +3806,64 @@
+   return success();
  }
  
-+static LogicalResult convertOmpDistribute(
-+    Operation &opInst, llvm::IRBuilderBase &builder,
-+    LLVM::ModuleTranslation &moduleTranslation,
-+    llvm::OpenMPIRBuilder::InsertPointTy *redAllocaIP,
-+    SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> &reductionInfos) {
++static LogicalResult
++convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
++                     LLVM::ModuleTranslation &moduleTranslation) {
 +  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
-+  // FIXME: This ignores any other nested wrappers (e.g. omp.parallel +
-+  // omp.wsloop, omp.simd).
++  // FIXME: This ignores any other nested wrappers (e.g. omp.wsloop, omp.simd).
 +  auto distributeOp = cast<omp::DistributeOp>(opInst);
++  if (failed(checkImplementationStatus(opInst)))
++    return failure();
++
 +  auto loopOp = cast<omp::LoopNestOp>(distributeOp.getWrappedLoop());
 +
 +  SmallVector<omp::LoopWrapperInterface> loopWrappers;
 +  loopOp.gatherWrappers(loopWrappers);
 +
 +  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-+  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
-+  // relying on captured variables.
-+  LogicalResult bodyGenStatus = success();
-+
-+  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
++  auto bodyGenCB = [&](InsertPointTy allocaIP,
++                       InsertPointTy codeGenIP) -> llvm::Error {
 +    // Save the alloca insertion point on ModuleTranslation stack for use in
 +    // nested regions.
 +    LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
@@ -13499,79 +13431,44 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +
 +    // DistributeOp has only one region associated with it.
 +    builder.restoreIP(codeGenIP);
-+    *redAllocaIP = allocaIP;
 +
 +    if (loopWrappers.size() == 1) {
 +      // Convert a standalone DISTRIBUTE construct.
 +      auto loopNestConversionResult = convertLoopNestHelper(
 +          *loopOp, builder, moduleTranslation, "omp.distribute.region");
 +      if (!loopNestConversionResult)
-+        return; // TODO: Signal error to abort translation.
++        return llvm::make_error<PreviouslyReportedError>();
 +
 +      builder.restoreIP(std::get<InsertPointTy>(*loopNestConversionResult));
 +    } else {
 +      // Convert a DISTRIBUTE leaf as part of a composite construct.
 +      mlir::Region &reg = distributeOp.getRegion();
-+      auto *regionBlock =
-+          convertOmpOpRegions(reg, "omp.distribute.region", builder,
-+                              moduleTranslation, bodyGenStatus);
-+
-+      builder.SetInsertPoint(regionBlock->getTerminator());
-+    }
-+
-+    // FIXME(JAN): We need to know if we are inside a distribute and
-+    // if there is an inner wsloop reduction, in that case we need to
-+    // generate the teams reduction bits to combine everything correctly. We
-+    // will try to collect the reduction info from the inner wsloop and use
-+    // that instead of the reduction clause that could have been on the
-+    // omp.parallel
-+    auto IP = builder.saveIP();
-+    if (ompBuilder->Config.isGPU()) {
-+      // TODO: Consider passing the isByref array together with reductionInfos
-+      // if it needs to match nested parallel-do or simd.
-+      SmallVector<bool> isByref(reductionInfos.size(), true);
-+      llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
-+          ompBuilder->createReductions(IP, allocaIP, reductionInfos, isByref,
-+                                       /*IsNoWait=*/false,
-+                                       /*IsTeamsReduction=*/true);
-+      builder.restoreIP(contInsertPoint);
++      llvm::Expected<llvm::BasicBlock *> regionBlock = convertOmpOpRegions(
++          reg, "omp.distribute.region", builder, moduleTranslation);
++      if (!regionBlock)
++        return regionBlock.takeError();
++      builder.SetInsertPoint((*regionBlock)->getTerminator());
 +    }
++    return llvm::Error::success();
 +  };
 +
 +  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
 +      findAllocaInsertPoint(builder, moduleTranslation);
 +  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-+  builder.restoreIP(ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB));
++  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
++      ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
 +
-+  return success();
-+}
++  if (!afterIP)
++    return opInst.emitError(llvm::toString(afterIP.takeError()));
++  builder.restoreIP(*afterIP);
 +
-+static LogicalResult
-+convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
-+                     LLVM::ModuleTranslation &moduleTranslation) {
-+  // No reductions are present so we just create dummy variables.
-+  llvm::OpenMPIRBuilder::InsertPointTy dummyRedAllocaIP;
-+  SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> dummyReductionInfos;
-+  return convertOmpDistribute(opInst, builder, moduleTranslation,
-+                              &dummyRedAllocaIP, dummyReductionInfos);
++  return success();
 +}
 +
  /// Lowers the FlagsAttr which is applied to the module on the device
  /// pass when offloading, this attribute contains OpenMP RTL globals that can
  /// be passed as flags to the frontend, otherwise they are set to default
-@@ -2831,11 +3057,6 @@
-     return false;
-   }
- 
--  if (targetOp.getThreadLimit()) {
--    opInst.emitError("Thread limit clause not yet supported");
--    return false;
--  }
--
-   if (targetOp.getNowait()) {
-     opInst.emitError("Nowait clause not yet supported");
-     return false;
-@@ -2950,7 +3171,7 @@
+@@ -3783,7 +4039,7 @@
        ompBuilder.M.getDataLayout().getProgramAddressSpace();
  
    // Create the alloca for the argument the current point.
@@ -13579,8 +13476,8 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS, nullptr);
  
    if (allocaAS != defaultAS && arg.getType()->isPointerTy())
-     v = builder.CreatePointerBitCastOrAddrSpaceCast(
-@@ -2980,6 +3201,181 @@
+     v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
+@@ -3814,6 +4070,301 @@
    return builder.saveIP();
  }
  
@@ -13598,6 +13495,95 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +  return op->getParentOfType<OpTy>();
 +}
 +
++static uint64_t getTypeByteSize(mlir::Type type, DataLayout dl) {
++  uint64_t sizeInBits = dl.getTypeSizeInBits(type);
++  uint64_t sizeInBytes = sizeInBits / 8;
++  return sizeInBytes;
++}
++
++template <typename OpTy>
++static uint64_t getReductionDataSize(OpTy &op) {
++  if (op.getNumReductionVars() > 0) {
++    assert(op.getNumReductionVars() &&
++           "Only 1 reduction variable currently supported");
++    mlir::Type reductionVarTy = op.getReductionVars()[0].getType();
++    Operation *opp = op.getOperation();
++    DataLayout dl = DataLayout(opp->getParentOfType<ModuleOp>());
++    return getTypeByteSize(reductionVarTy, dl);
++  }
++  return 0;
++}
++
++static uint64_t getTeamsReductionDataSize(mlir::omp::TeamsOp &teamsOp) {
++  return getReductionDataSize<mlir::omp::TeamsOp>(teamsOp);
++}
++
++/// Follow uses of `host_eval`-defined block arguments of the given `omp.target`
++/// operation and populate output variables with their corresponding host value
++/// (i.e. operand evaluated outside of the target region), based on their uses
++/// inside of the target region.
++///
++/// Loop bounds and steps are only optionally populated, if output vectors are
++/// provided.
++static void
++extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
++                       Value &numTeamsLower, Value &numTeamsUpper,
++                       Value &threadLimit,
++                       llvm::SmallVectorImpl<Value> *lowerBounds = nullptr,
++                       llvm::SmallVectorImpl<Value> *upperBounds = nullptr,
++                       llvm::SmallVectorImpl<Value> *steps = nullptr) {
++  auto blockArgIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
++  for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(),
++                                   blockArgIface.getHostEvalBlockArgs())) {
++    Value hostEvalVar = std::get<0>(item), blockArg = std::get<1>(item);
++
++    for (Operation *user : blockArg.getUsers()) {
++      llvm::TypeSwitch<Operation *>(user)
++          .Case([&](omp::TeamsOp teamsOp) {
++            if (teamsOp.getNumTeamsLower() == blockArg)
++              numTeamsLower = hostEvalVar;
++            else if (teamsOp.getNumTeamsUpper() == blockArg)
++              numTeamsUpper = hostEvalVar;
++            else if (teamsOp.getThreadLimit() == blockArg)
++              threadLimit = hostEvalVar;
++            else
++              llvm_unreachable("unsupported host_eval use");
++          })
++          .Case([&](omp::ParallelOp parallelOp) {
++            if (parallelOp.getNumThreads() == blockArg)
++              numThreads = hostEvalVar;
++            else
++              llvm_unreachable("unsupported host_eval use");
++          })
++          .Case([&](omp::LoopNestOp loopOp) {
++            auto processBounds =
++                [&](OperandRange opBounds,
++                    llvm::SmallVectorImpl<Value> *outBounds) -> bool {
++              bool found = false;
++              for (auto [i, lb] : llvm::enumerate(opBounds)) {
++                if (lb == blockArg) {
++                  found = true;
++                  if (outBounds)
++                    (*outBounds)[i] = hostEvalVar;
++                }
++              }
++              return found;
++            };
++            bool found =
++                processBounds(loopOp.getLoopLowerBounds(), lowerBounds);
++            found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) ||
++                    found;
++            found = processBounds(loopOp.getLoopSteps(), steps) || found;
++            if (!found)
++              llvm_unreachable("unsupported host_eval use");
++          })
++          .Default([](Operation *) {
++            llvm_unreachable("unsupported host_eval use");
++          });
++    }
++  }
++}
++
 +/// Populate default `MinTeams`, `MaxTeams` and `MaxThreads` to their default
 +/// values as stated by the corresponding clauses, if constant.
 +///
@@ -13608,29 +13594,52 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +    omp::TargetOp targetOp,
 +    llvm::OpenMPIRBuilder::TargetKernelDefaultBounds &bounds,
 +    bool isTargetDevice, bool isGPU) {
-+  // TODO Handle constant IF clauses
-+  Operation *innermostCapturedOmpOp = targetOp.getInnermostCapturedOmpOp();
++  // TODO: Handle constant 'if' clauses.
++  Operation *capturedOp = targetOp.getInnermostCapturedOmpOp();
++
++  // Extract values for host-evaluated clauses.
++  Value numThreads, numTeamsLower, numTeamsUpper, threadLimit;
++  if (!isTargetDevice) {
++    extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
++                           threadLimit);
++  } else {
++    // In the target device, values for these clauses are not passed as
++    // host_eval, but instead evaluated prior to entry to the region. This
++    // ensures values are mapped and available inside of the target region.
++    if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
++      numTeamsLower = teamsOp.getNumTeamsLower();
++      numTeamsUpper = teamsOp.getNumTeamsUpper();
++      threadLimit = teamsOp.getThreadLimit();
++    }
++
++    if (auto parallelOp = castOrGetParentOfType<omp::ParallelOp>(capturedOp))
++      numThreads = parallelOp.getNumThreads();
++  }
++
++  auto extractConstInteger = [](Value value) -> std::optional<int64_t> {
++    if (auto constOp =
++            dyn_cast_if_present<LLVM::ConstantOp>(value.getDefiningOp()))
++      if (auto constAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
++        return constAttr.getInt();
++
++    return std::nullopt;
++  };
 +
 +  // Handle clauses impacting the number of teams.
++
 +  int32_t minTeamsVal = 1, maxTeamsVal = -1;
-+  if (auto teamsOp =
-+          castOrGetParentOfType<omp::TeamsOp>(innermostCapturedOmpOp)) {
-+    // TODO Use teamsOp.getNumTeamsLower() to initialize `minTeamsVal`. For now,
-+    // just match clang and set min and max to the same value.
-+    Value numTeamsClause = isTargetDevice ? teamsOp.getNumTeamsUpper()
-+                                          : targetOp.getNumTeamsUpper();
-+    if (numTeamsClause) {
-+      if (auto constOp = dyn_cast_if_present<LLVM::ConstantOp>(
-+              numTeamsClause.getDefiningOp())) {
-+        if (auto constAttr = constOp.getValue().dyn_cast<IntegerAttr>())
-+          minTeamsVal = maxTeamsVal = constAttr.getInt();
-+      }
++  if (castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
++    // TODO: Use `hostNumTeamsLower` to initialize `minTeamsVal`. For now, match
++    // clang and set min and max to the same value.
++    if (numTeamsUpper) {
++      if (auto val = extractConstInteger(numTeamsUpper))
++        minTeamsVal = maxTeamsVal = *val;
 +    } else {
 +      minTeamsVal = maxTeamsVal = 0;
 +    }
-+  } else if (castOrGetParentOfType<omp::ParallelOp>(innermostCapturedOmpOp,
++  } else if (castOrGetParentOfType<omp::ParallelOp>(capturedOp,
 +                                                    /*immediateParent=*/true) ||
-+             castOrGetParentOfType<omp::SimdOp>(innermostCapturedOmpOp,
++             castOrGetParentOfType<omp::SimdOp>(capturedOp,
 +                                                /*immediateParent=*/true)) {
 +    minTeamsVal = maxTeamsVal = 1;
 +  } else {
@@ -13638,46 +13647,33 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +  }
 +
 +  // Handle clauses impacting the number of threads.
-+  int32_t targetThreadLimitVal = -1;
-+  int32_t teamsThreadLimitVal = -1;
-+  int32_t maxThreadsVal = -1;
 +
-+  auto setMaxValueFromClause = [](Value clauseValue, int32_t &result) {
-+    if (clauseValue) {
-+      if (auto constOp = dyn_cast_if_present<LLVM::ConstantOp>(
-+              clauseValue.getDefiningOp())) {
-+        if (auto constAttr = constOp.getValue().dyn_cast<IntegerAttr>())
-+          result = constAttr.getInt();
-+      }
-+      // Found an applicable clause, so it's not undefined. Mark as unknown
-+      // because it's not constant.
-+      if (result < 0)
-+        result = 0;
-+    }
++  auto setMaxValueFromClause = [&extractConstInteger](Value clauseValue,
++                                                      int32_t &result) {
++    if (!clauseValue)
++      return;
++
++    if (auto val = extractConstInteger(clauseValue))
++      result = *val;
++
++    // Found an applicable clause, so it's not undefined. Mark as unknown
++    // because it's not constant.
++    if (result < 0)
++      result = 0;
 +  };
 +
 +  // Extract THREAD_LIMIT clause from TARGET and TEAMS directives.
++  int32_t targetThreadLimitVal = -1, teamsThreadLimitVal = -1;
 +  setMaxValueFromClause(targetOp.getThreadLimit(), targetThreadLimitVal);
-+
-+  if (auto teamsOp =
-+          castOrGetParentOfType<omp::TeamsOp>(innermostCapturedOmpOp)) {
-+    Value threadLimitClause = isTargetDevice ? teamsOp.getThreadLimit()
-+                                             : targetOp.getTeamsThreadLimit();
-+    setMaxValueFromClause(threadLimitClause, teamsThreadLimitVal);
-+  }
++  setMaxValueFromClause(threadLimit, teamsThreadLimitVal);
 +
 +  // Extract MAX_THREADS clause from PARALLEL or set to 1 if it's SIMD.
-+  if (innermostCapturedOmpOp) {
-+    if (auto parallelOp =
-+            castOrGetParentOfType<omp::ParallelOp>(innermostCapturedOmpOp)) {
-+      Value numThreadsClause = isTargetDevice ? parallelOp.getNumThreadsVar()
-+                                              : targetOp.getNumThreads();
-+      setMaxValueFromClause(numThreadsClause, maxThreadsVal);
-+    } else if (castOrGetParentOfType<omp::SimdOp>(innermostCapturedOmpOp,
-+                                                  /*immediateParent=*/true)) {
-+      maxThreadsVal = 1;
-+    }
-+  }
++  int32_t maxThreadsVal = -1;
++  if (castOrGetParentOfType<omp::ParallelOp>(capturedOp))
++    setMaxValueFromClause(numThreads, maxThreadsVal);
++  else if (castOrGetParentOfType<omp::SimdOp>(capturedOp,
++                                              /*immediateParent=*/true))
++    maxThreadsVal = 1;
 +
 +  // For max values, < 0 means unset, == 0 means set but unknown. Select the
 +  // minimum value between MAX_THREADS and THREAD_LIMIT clauses that were set.
@@ -13693,41 +13689,17 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +  // Calculate reduction data size, limited to single reduction variable
 +  // for now.
 +  int32_t reductionDataSize = 0;
-+  if (isGPU && innermostCapturedOmpOp) {
-+    if (auto loopNestOp =
-+            mlir::dyn_cast<mlir::omp::LoopNestOp>(innermostCapturedOmpOp)) {
-+      // FIXME: This treats 'DO SIMD' as if it was a 'DO' construct. Reductions
-+      // on other constructs apart from 'DO' aren't considered either.
-+      mlir::omp::WsloopOp wsloopOp = nullptr;
-+      SmallVector<mlir::omp::LoopWrapperInterface> wrappers;
-+      loopNestOp.gatherWrappers(wrappers);
-+      for (auto wrapper : wrappers) {
-+        wsloopOp = mlir::dyn_cast<mlir::omp::WsloopOp>(*wrapper);
-+        if (wsloopOp)
-+          break;
-+      }
-+      if (wsloopOp) {
-+        if (wsloopOp.getNumReductionVars() > 0) {
-+          assert(wsloopOp.getNumReductionVars() &&
-+                 "Only 1 reduction variable currently supported");
-+          mlir::Value reductionVar = wsloopOp.getReductionVars()[0];
-+          DataLayout dl =
-+              DataLayout(innermostCapturedOmpOp->getParentOfType<ModuleOp>());
-+
-+          mlir::Type reductionVarTy = reductionVar.getType();
-+          uint64_t sizeInBits = dl.getTypeSizeInBits(reductionVarTy);
-+          uint64_t sizeInBytes = sizeInBits / 8;
-+          reductionDataSize = sizeInBytes;
-+        }
-+      }
++  if (isGPU && capturedOp) {
++    if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
++      reductionDataSize = getTeamsReductionDataSize(teamsOp);
 +    }
 +  }
 +
 +  // Update kernel bounds structure for the `OpenMPIRBuilder` to use.
 +  bounds.MinTeams = minTeamsVal;
-+  bounds.MaxTeams = maxTeamsVal;
++  bounds.MaxTeams.push_back(maxTeamsVal);
 +  bounds.MinThreads = 1;
-+  bounds.MaxThreads = combinedMaxThreadsVal;
++  bounds.MaxThreads.push_back(combinedMaxThreadsVal);
 +  bounds.ReductionDataSize = reductionDataSize;
 +  if (bounds.ReductionDataSize != 0)
 +    bounds.ReductionBufferLength = 1024;
@@ -13740,77 +13712,108 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +/// only provide correct results if it's called after the body of \c targetOp
 +/// has been fully generated.
 +static void initTargetRuntimeBounds(
-+    LLVM::ModuleTranslation &moduleTranslation, omp::TargetOp targetOp,
++    llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
++    omp::TargetOp targetOp,
 +    llvm::OpenMPIRBuilder::TargetKernelRuntimeBounds &bounds) {
++  omp::LoopNestOp loopOp = castOrGetParentOfType<omp::LoopNestOp>(
++      targetOp.getInnermostCapturedOmpOp());
++  unsigned numLoops = loopOp ? loopOp.getNumLoops() : 0;
++
++  Value numThreads, numTeamsLower, numTeamsUpper, teamsThreadLimit;
++  llvm::SmallVector<Value> lowerBounds(numLoops), upperBounds(numLoops),
++      steps(numLoops);
++  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
++                         teamsThreadLimit, &lowerBounds, &upperBounds, &steps);
++
 +  // TODO Handle IF clauses.
-+  if (Value numTeamsLower = targetOp.getNumTeamsLower())
++  llvm::Value *&llvmTargetThreadLimit =
++      bounds.TargetThreadLimit.emplace_back(nullptr);
++  if (Value targetThreadLimit = targetOp.getThreadLimit())
++    llvmTargetThreadLimit = moduleTranslation.lookupValue(targetThreadLimit);
++
++  if (numTeamsLower)
 +    bounds.MinTeams = moduleTranslation.lookupValue(numTeamsLower);
 +
-+  if (Value numTeamsUpper = targetOp.getNumTeamsUpper())
-+    bounds.MaxTeams = moduleTranslation.lookupValue(numTeamsUpper);
++  llvm::Value *&llvmMaxTeams = bounds.MaxTeams.emplace_back(nullptr);
++  if (numTeamsUpper)
++    llvmMaxTeams = moduleTranslation.lookupValue(numTeamsUpper);
 +
-+  if (Value teamsThreadLimit = targetOp.getTeamsThreadLimit())
-+    bounds.TeamsThreadLimit = moduleTranslation.lookupValue(teamsThreadLimit);
++  llvm::Value *&llvmTeamsThreadLimit =
++      bounds.TeamsThreadLimit.emplace_back(nullptr);
++  if (teamsThreadLimit)
++    llvmTeamsThreadLimit = moduleTranslation.lookupValue(teamsThreadLimit);
 +
-+  if (Value numThreads = targetOp.getNumThreads())
++  if (numThreads)
 +    bounds.MaxThreads = moduleTranslation.lookupValue(numThreads);
 +
-+  if (Value tripCount = targetOp.getTripCount())
-+    bounds.LoopTripCount = moduleTranslation.lookupValue(tripCount);
++  if (targetOp.isTargetSPMDLoop()) {
++    llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
++    bounds.LoopTripCount = nullptr;
++
++    // To calculate the trip count, we multiply together the trip counts of
++    // every collapsed canonical loop. We don't need to create the loop nests
++    // here, since we're only interested in the trip count.
++    for (auto [loopLower, loopUpper, loopStep] :
++         llvm::zip_equal(lowerBounds, upperBounds, steps)) {
++      llvm::Value *lowerBound = moduleTranslation.lookupValue(loopLower);
++      llvm::Value *upperBound = moduleTranslation.lookupValue(loopUpper);
++      llvm::Value *step = moduleTranslation.lookupValue(loopStep);
++
++      llvm::OpenMPIRBuilder::LocationDescription loc(builder);
++      llvm::Value *tripCount = ompBuilder->calculateCanonicalLoopTripCount(
++          loc, lowerBound, upperBound, step, /*IsSigned=*/true,
++          loopOp.getLoopInclusive());
++
++      if (!bounds.LoopTripCount) {
++        bounds.LoopTripCount = tripCount;
++        continue;
++      }
++
++      // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
++      bounds.LoopTripCount = builder.CreateMul(bounds.LoopTripCount, tripCount,
++                                               {}, /*HasNUW=*/true);
++    }
++  }
 +}
 +
  static LogicalResult
  convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
                   LLVM::ModuleTranslation &moduleTranslation) {
-@@ -2987,32 +3383,20 @@
-   if (!targetOpSupported(opInst))
-     return failure();
+@@ -3823,12 +4374,14 @@
  
-+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
-+  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
+   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+   bool isTargetDevice = ompBuilder->Config.isTargetDevice();
 +  bool isGPU = ompBuilder->Config.isGPU();
++
    auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
-   auto targetOp = cast<omp::TargetOp>(opInst);
++  auto blockIface = cast<omp::BlockArgOpenMPOpInterface>(opInst);
    auto &targetRegion = targetOp.getRegion();
    DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
-   SmallVector<Value> mapOperands = targetOp.getMapOperands();
- 
-+  llvm::OpenMPIRBuilder::TargetKernelRuntimeBounds runtimeBounds;
-   LogicalResult bodyGenStatus = success();
-   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-   auto bodyCB = [&](InsertPointTy allocaIP,
-                     InsertPointTy codeGenIP) -> InsertPointTy {
--    // Forward target-cpu and target-features function attributes from the
--    // original function to the new outlined function.
--    llvm::Function *llvmParentFn =
--        moduleTranslation.lookupFunction(parentFn.getName());
--    llvm::Function *llvmOutlinedFn = codeGenIP.getBlock()->getParent();
--    assert(llvmParentFn && llvmOutlinedFn &&
--           "Both parent and outlined functions must exist at this point");
--
--    if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
--        attr.isStringAttribute())
--      llvmOutlinedFn->addFnAttr(attr);
--
--    if (auto attr = llvmParentFn->getFnAttribute("target-features");
--        attr.isStringAttribute())
--      llvmOutlinedFn->addFnAttr(attr);
--
-     builder.restoreIP(codeGenIP);
-     unsigned argIndex = 0;
-     for (auto &mapOp : mapOperands) {
-@@ -3027,6 +3411,10 @@
-     llvm::BasicBlock *exitBlock = convertOmpOpRegions(
-         targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
-     builder.SetInsertPoint(exitBlock);
-+
-+    if (!isTargetDevice)
-+      initTargetRuntimeBounds(moduleTranslation, targetOp, runtimeBounds);
+   SmallVector<Value> mapVars = targetOp.getMapVars();
+-  ArrayRef<BlockArgument> mapBlockArgs =
+-      cast<omp::BlockArgOpenMPOpInterface>(opInst).getMapBlockArgs();
++  ArrayRef<BlockArgument> mapBlockArgs = blockIface.getMapBlockArgs();
+   llvm::Function *llvmOutlinedFn = nullptr;
+ 
+   // TODO: It can also be false if a compile-time constant `false` IF clause is
+@@ -3871,7 +4424,7 @@
+       OperandRange privateVars = targetOp.getPrivateVars();
+       std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
+       MutableArrayRef<BlockArgument> privateBlockArgs =
+-          cast<omp::BlockArgOpenMPOpInterface>(opInst).getPrivateBlockArgs();
++          blockIface.getPrivateBlockArgs();
+ 
+       for (auto [privVar, privatizerNameAttr, privBlockArg] :
+            llvm::zip_equal(privateVars, *privateSyms, privateBlockArgs)) {
+@@ -3905,6 +4458,7 @@
+       return exitBlock.takeError();
+ 
+     builder.SetInsertPoint(*exitBlock);
 +
      return builder.saveIP();
    };
  
-@@ -3038,9 +3426,6 @@
+@@ -3916,9 +4470,6 @@
    if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName))
      return failure();
  
@@ -13820,49 +13823,57 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
    llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
        findAllocaInsertPoint(builder, moduleTranslation);
  
-@@ -3060,14 +3445,12 @@
-   auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
-                            llvm::Value *&retVal, InsertPointTy allocaIP,
-                            InsertPointTy codeGenIP) {
--    llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
--
-     // We just return the unaltered argument for the host function
-     // for now, some alterations may be required in the future to
-     // keep host fallback functions working identically to the device
-     // version (e.g. pass ByCopy values should be treated as such on
-     // host and device, currently not always the case)
--    if (!ompBuilder->Config.isTargetDevice()) {
-+    if (!isTargetDevice) {
-       retVal = cast<llvm::Value>(&arg);
-       return codeGenIP;
-     }
-@@ -3089,13 +3472,21 @@
-       kernelInput.push_back(mapData.OriginalValue[i]);
-   }
+@@ -3954,6 +4505,29 @@
+   };
  
--  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget(
--      ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams,
--      defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB));
+   llvm::SmallVector<llvm::Value *, 4> kernelInput;
 +  llvm::OpenMPIRBuilder::TargetKernelDefaultBounds defaultBounds;
 +  initTargetDefaultBounds(targetOp, defaultBounds, isTargetDevice, isGPU);
 +
-+  if (Value targetThreadLimit = targetOp.getThreadLimit())
-+    runtimeBounds.TargetThreadLimit =
-+        moduleTranslation.lookupValue(targetThreadLimit);
-+
-+  builder.restoreIP(ompBuilder->createTarget(
-+      ompLoc, targetOp.isTargetSPMDLoop(), allocaIP, builder.saveIP(),
-+      entryInfo, defaultBounds, runtimeBounds, kernelInput, genMapInfoCB,
-+      bodyCB, argAccessorCB));
- 
-   // Remap access operations to declare target reference pointers for the
-   // device, essentially generating extra loadop's as necessary
--  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
-+  if (isTargetDevice)
-     handleDeclareTargetMapVar(mapData, moduleTranslation, builder);
- 
-   return bodyGenStatus;
-@@ -3191,24 +3582,45 @@
++  // Collect host-evaluated values needed to properly launch the kernel from the
++  // host.
++  llvm::OpenMPIRBuilder::TargetKernelRuntimeBounds runtimeBounds;
++  if (!isTargetDevice)
++    initTargetRuntimeBounds(builder, moduleTranslation, targetOp,
++                            runtimeBounds);
++
++  // Pass host-evaluated values as parameters to the kernel / host fallback,
++  // except if they are constants. In any case, map the MLIR block argument to
++  // the corresponding LLVM values.
++  SmallVector<Value> hostEvalVars = targetOp.getHostEvalVars();
++  ArrayRef<BlockArgument> hostEvalBlockArgs = blockIface.getHostEvalBlockArgs();
++  for (auto [arg, var] : llvm::zip_equal(hostEvalBlockArgs, hostEvalVars)) {
++    llvm::Value *value = moduleTranslation.lookupValue(var);
++    moduleTranslation.mapValue(arg, value);
++
++    if (!llvm::isa<llvm::Constant>(value))
++      kernelInput.push_back(value);
++  }
++
+   for (size_t i = 0; i < mapVars.size(); ++i) {
+     // declare target arguments are not passed to kernels as arguments
+     // TODO: We currently do not handle cases where a member is explicitly
+@@ -3969,11 +4543,16 @@
+   buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
+                   moduleTranslation, dds);
+ 
++  llvm::Value *ifCond = nullptr;
++  if (Value targetIfCond = targetOp.getIfExpr())
++    ifCond = moduleTranslation.lookupValue(targetIfCond);
++
+   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+       moduleTranslation.getOpenMPBuilder()->createTarget(
+-          ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo,
+-          defaultValTeams, defaultValThreads, kernelInput, genMapInfoCB, bodyCB,
+-          argAccessorCB, dds, targetOp.getNowait());
++          ompLoc, targetOp.isTargetSPMDLoop(), isOffloadEntry, ifCond, allocaIP,
++          builder.saveIP(), entryInfo, defaultBounds, runtimeBounds,
++          kernelInput, genMapInfoCB, bodyCB, argAccessorCB, dds,
++          targetOp.getNowait());
+ 
+   if (failed(handleError(afterIP, opInst)))
+     return failure();
+@@ -4079,25 +4658,6 @@
    return success();
  }
  
@@ -13872,10 +13883,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 -  // Assumes no reverse offloading
 -  if (op->getParentOfType<omp::TargetOp>())
 -    return true;
-+///////////////////////////////////////////////////////////////////////////////
-+// CompoundConstructs lowering forward declarations
-+class OpenMPDialectLLVMIRTranslationInterface;
- 
+-
 -  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
 -    if (auto declareTargetIface =
 -            llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
@@ -13884,47 +13892,14 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 -          declareTargetIface.getDeclareTargetDeviceType() !=
 -              mlir::omp::DeclareTargetDeviceType::host)
 -        return true;
-+using ConvertFunctionTy = std::function<std::pair<bool, LogicalResult>(
-+    Operation *, llvm::IRBuilderBase &, LLVM::ModuleTranslation &)>;
- 
+-
 -  return false;
 -}
-+class ConversionDispatchList {
-+private:
-+  llvm::SmallVector<ConvertFunctionTy> functions;
-+
-+public:
-+  std::pair<bool, LogicalResult>
-+  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
-+                   LLVM::ModuleTranslation &moduleTranslation) {
-+    for (auto riter = functions.rbegin(); riter != functions.rend(); ++riter) {
-+      bool match = false;
-+      LogicalResult result = failure();
-+      std::tie(match, result) = (*riter)(op, builder, moduleTranslation);
-+      if (match)
-+        return {true, result};
-+    }
-+    return {false, failure()};
-+  }
-+
-+  void pushConversionFunction(ConvertFunctionTy function) {
-+    functions.push_back(function);
-+  }
-+  void popConversionFunction() { functions.pop_back(); }
-+};
-+
-+static LogicalResult convertOmpDistributeParallelWsloop(
-+    Operation *op, omp::DistributeOp distribute, omp::ParallelOp parallel,
-+    omp::WsloopOp wsloop, llvm::IRBuilderBase &builder,
-+    LLVM::ModuleTranslation &moduleTranslation,
-+    ConversionDispatchList &dispatchList);
-+
-+///////////////////////////////////////////////////////////////////////////////
-+// Dispatch functions
- 
+-
  /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
  /// (including OpenMP runtime calls).
-@@ -3313,6 +3725,9 @@
+ static LogicalResult
+@@ -4214,6 +4774,9 @@
        .Case([&](omp::TargetOp) {
          return convertOmpTarget(*op, builder, moduleTranslation);
        })
@@ -13934,7 +13909,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
        .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
            [&](auto op) {
              // No-op, should be handled by relevant owning operations e.g.
-@@ -3326,9 +3741,101 @@
+@@ -4226,6 +4789,38 @@
        });
  }
  
@@ -13957,55 +13932,6 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +  return false;
 +}
 +
-+// Returns true if the given block has a single instruction.
-+static bool singleInstrBlock(Block &block) {
-+  bool result = (block.getOperations().size() == 2);
-+  if (!result) {
-+    llvm::errs() << "Num ops: " << block.getOperations().size() << "\n";
-+  }
-+  return result;
-+}
-+
-+// Returns the operation if it only contains one instruction otherwise
-+// return nullptr.
-+template <typename OpType>
-+Operation *getContainedInstr(OpType op) {
-+  Region &region = op.getRegion();
-+  if (!region.hasOneBlock()) {
-+    llvm::errs() << "Region has multiple blocks\n";
-+    return nullptr;
-+  }
-+  Block &block = region.front();
-+  if (!singleInstrBlock(block)) {
-+    return nullptr;
-+  }
-+  return &(block.getOperations().front());
-+}
-+
-+// Returns the operation if it only contains one instruction otherwise
-+// return nullptr.
-+template <typename OpType>
-+Block &getContainedBlock(OpType op) {
-+  Region &region = op.getRegion();
-+  return region.front();
-+}
-+
-+template <typename FirstOpType, typename... RestOpTypes>
-+bool matchOpScanNest(Block &block, FirstOpType &firstOp,
-+                     RestOpTypes &...restOps) {
-+  for (Operation &op : block) {
-+    if ((firstOp = mlir::dyn_cast<FirstOpType>(op))) {
-+      if constexpr (sizeof...(RestOpTypes) == 0) {
-+        return true;
-+      } else {
-+        Block &innerBlock = getContainedBlock(firstOp);
-+        return matchOpScanNest(innerBlock, restOps...);
-+      }
-+    }
-+  }
-+  return false;
-+}
-+
 +template <typename FirstOpType, typename... RestOpTypes>
 +bool matchOpNest(Operation *op, FirstOpType &firstOp, RestOpTypes &...restOps) {
 +  if ((firstOp = mlir::dyn_cast<FirstOpType>(op))) {
@@ -14021,87 +13947,18 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
 +
  static LogicalResult
  convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
--                      LLVM::ModuleTranslation &moduleTranslation) {
-+                      LLVM::ModuleTranslation &moduleTranslation,
-+                      ConversionDispatchList &dispatchList) {
-+  omp::DistributeOp distribute;
-+  omp::ParallelOp parallel;
-+  omp::WsloopOp wsloop;
-+  // Match composite constructs
-+  if (matchOpNest(op, distribute, parallel, wsloop)) {
-+    return convertOmpDistributeParallelWsloop(op, distribute, parallel, wsloop,
-+                                              builder, moduleTranslation,
-+                                              dispatchList);
-+  }
-+
-   return convertHostOrTargetOperation(op, builder, moduleTranslation);
- }
- 
-@@ -3356,12 +3863,62 @@
+                       LLVM::ModuleTranslation &moduleTranslation) {
+@@ -4256,7 +4851,8 @@
    return failure(interrupted);
  }
  
 -namespace {
 +///////////////////////////////////////////////////////////////////////////////
-+// CompoundConstructs lowering implementations
-+
-+// Implementation converting a nest of operations in a single function. This
-+// just overrides the parallel and wsloop dispatches but does the normal
-+// lowering for now.
-+static LogicalResult convertOmpDistributeParallelWsloop(
-+    Operation *op, omp::DistributeOp distribute, omp::ParallelOp parallel,
-+    omp::WsloopOp wsloop, llvm::IRBuilderBase &builder,
-+    LLVM::ModuleTranslation &moduleTranslation,
-+    ConversionDispatchList &dispatchList) {
-+
-+  // Reduction related data structures
-+  SmallVector<OwningReductionGen> owningReductionGens;
-+  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
-+  SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
-+  llvm::OpenMPIRBuilder::InsertPointTy redAllocaIP;
-+
-+  // Convert wsloop alternative implementation
-+  ConvertFunctionTy convertWsloop =
-+      [&redAllocaIP, &owningReductionGens, &owningAtomicReductionGens,
-+       &reductionInfos](Operation *op, llvm::IRBuilderBase &builder,
-+                        LLVM::ModuleTranslation &moduleTranslation) {
-+        if (!isa<omp::WsloopOp>(op)) {
-+          return std::make_pair(false, failure());
-+        }
-+
-+        LogicalResult result = convertOmpWsloop(
-+            *op, builder, moduleTranslation, redAllocaIP, owningReductionGens,
-+            owningAtomicReductionGens, reductionInfos);
-+        return std::make_pair(true, result);
-+      };
-+
-+  // Push the new alternative functions
-+  dispatchList.pushConversionFunction(convertWsloop);
-+
-+  // Lower the current distribute operation
-+  LogicalResult result = convertOmpDistribute(*op, builder, moduleTranslation,
-+                                              &redAllocaIP, reductionInfos);
-+
-+  // Pop the alternative functions
-+  dispatchList.popConversionFunction();
-+
-+  return result;
-+}
-+
-+///////////////////////////////////////////////////////////////////////////////
 +// OpenMPDialectLLVMIRTranslationInterface
  
  /// Implementation of the dialect interface that converts operations belonging
  /// to the OpenMP dialect to LLVM IR.
- class OpenMPDialectLLVMIRTranslationInterface
-     : public LLVMTranslationDialectInterface {
-+private:
-+  mutable ConversionDispatchList dispatchList;
-+
- public:
-   using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
- 
-@@ -3371,16 +3928,14 @@
+@@ -4271,16 +4867,14 @@
    convertOperation(Operation *op, llvm::IRBuilderBase &builder,
                     LLVM::ModuleTranslation &moduleTranslation) const final;
  
@@ -14120,369 +13977,193 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/lib/Target/LLVMIR/Dialect/O
  LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
      Operation *op, ArrayRef<llvm::Instruction *> instructions,
      NamedAttribute attribute,
-@@ -3475,13 +4030,21 @@
-     Operation *op, llvm::IRBuilderBase &builder,
-     LLVM::ModuleTranslation &moduleTranslation) const {
- 
-+  // Check to see if there is a lowering that overrides the default lowering
-+  // if not use the default dispatch.
-+  bool match = false;
-+  LogicalResult result = success();
-+  std::tie(match, result) =
-+      dispatchList.convertOperation(op, builder, moduleTranslation);
-+  if (match)
-+    return result;
-+
-   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
-   if (ompBuilder->Config.isTargetDevice()) {
--    if (isTargetDeviceOp(op)) {
--      return convertTargetDeviceOp(op, builder, moduleTranslation);
--    } else {
--      return convertTargetOpsInNest(op, builder, moduleTranslation);
--    }
-+    if (isTargetDeviceOp(op))
-+      return convertTargetDeviceOp(op, builder, moduleTranslation,
-+                                   dispatchList);
-+    return convertTargetOpsInNest(op, builder, moduleTranslation);
-   }
-   return convertHostOrTargetOperation(op, builder, moduleTranslation);
- }
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir llvm-project/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
---- llvm-project.orig/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir	2024-06-12 10:43:15.340181898 -0500
-+++ llvm-project/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir	2024-06-12 10:44:09.359614154 -0500
-@@ -174,6 +174,7 @@
-     ^bb3:
-       omp.yield
-     }
-+    omp.terminator
-   }
-   return
- }
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Dialect/OpenMP/invalid.mlir llvm-project/mlir/test/Dialect/OpenMP/invalid.mlir
---- llvm-project.orig/mlir/test/Dialect/OpenMP/invalid.mlir	2024-06-12 10:43:15.360181687 -0500
-+++ llvm-project/mlir/test/Dialect/OpenMP/invalid.mlir	2024-06-12 10:44:09.359614154 -0500
-@@ -11,8 +11,8 @@
- // -----
- 
- func.func @not_wrapper() {
-+  // expected-error@+1 {{op must be a loop wrapper}}
-   omp.distribute {
--    // expected-error@+1 {{op must take a loop wrapper role if nested inside of 'omp.distribute'}}
-     omp.parallel {
-       %0 = arith.constant 0 : i32
-       omp.terminator
-@@ -363,12 +363,16 @@
- 
- // -----
- 
--func.func @omp_simd_nested_wrapper() -> () {
-+func.func @omp_simd_nested_wrapper(%lb : index, %ub : index, %step : index) -> () {
-   // expected-error @below {{op must wrap an 'omp.loop_nest' directly}}
-   omp.simd {
-     omp.distribute {
-+      omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
-+        omp.yield
-+      }
-       omp.terminator
-     }
-+    omp.terminator
-   }
-   return
- }
-@@ -1359,24 +1363,18 @@
- // -----
- 
- func.func @omp_teams_num_teams1(%lb : i32) {
--  omp.target {
--    // expected-error @below {{expected num_teams upper bound to be defined if the lower bound is defined}}
--    "omp.teams" (%lb) ({
--      omp.terminator
--    }) {operandSegmentSizes = array<i32: 1,0,0,0,0,0,0>} : (i32) -> ()
-+  // expected-error @below {{expected num_teams upper bound to be defined if the lower bound is defined}}
-+  "omp.teams" (%lb) ({
-     omp.terminator
--  }
-+  }) {operandSegmentSizes = array<i32: 1,0,0,0,0,0,0>} : (i32) -> ()
-   return
- }
- 
- // -----
- 
- func.func @omp_teams_num_teams2(%lb : i32, %ub : i16) {
--  omp.target {
--    // expected-error @below {{expected num_teams upper bound and lower bound to be the same type}}
--    omp.teams num_teams(%lb : i32 to %ub : i16) {
--      omp.terminator
--    }
-+  // expected-error @below {{expected num_teams upper bound and lower bound to be the same type}}
-+  omp.teams num_teams(%lb : i32 to %ub : i16) {
-     omp.terminator
-   }
-   return
-@@ -1920,6 +1918,7 @@
-       }
-       omp.terminator
-     }
-+    omp.terminator
-   }
-   return
- }
-@@ -2084,7 +2083,7 @@
-   // expected-error @below {{op expected as many depend values as depend variables}}
-     "omp.target"(%data_var) ({
-       "omp.terminator"() : () -> ()
--    }) {depends = [], operandSegmentSizes = array<i32: 0, 0, 0, 1, 0, 0, 0, 0>} : (memref<i32>) -> ()
-+    }) {depends = [], operandSegmentSizes = array<i32: 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0>} : (memref<i32>) -> ()
-    "func.return"() : () -> ()
- }
- 
-@@ -2118,11 +2117,13 @@
- 
- // -----
- 
--func.func @omp_distribute_nested_wrapper(%data_var : memref<i32>) -> () {
-+func.func @omp_distribute_nested_wrapper(%lb: index, %ub: index, %step: index) -> () {
-   // expected-error @below {{only supported nested wrappers are 'omp.parallel' and 'omp.simd'}}
-   omp.distribute {
-     "omp.wsloop"() ({
--      %0 = arith.constant 0 : i32
-+      omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
-+        "omp.yield"() : () -> ()
-+      }
-       "omp.terminator"() : () -> ()
-     }) : () -> ()
-     "omp.terminator"() : () -> ()
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Dialect/OpenMP/ops.mlir llvm-project/mlir/test/Dialect/OpenMP/ops.mlir
---- llvm-project.orig/mlir/test/Dialect/OpenMP/ops.mlir	2024-06-12 10:43:15.360181687 -0500
-+++ llvm-project/mlir/test/Dialect/OpenMP/ops.mlir	2024-06-12 10:44:09.359614154 -0500
-@@ -601,6 +601,7 @@
-     omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
-       omp.yield
-     }
-+    omp.terminator
-   }
-   return
- }
-@@ -616,6 +617,7 @@
-     omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
-       omp.yield
-     }
-+    omp.terminator
-   }
-   return
- }
-@@ -627,6 +629,7 @@
-     omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) {
-       omp.yield
-     }
-+    omp.terminator
-   }
-   return
- }
-@@ -640,6 +643,7 @@
-     omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
-       omp.yield
-     }
-+    omp.terminator
-   }
-   return
- }
-@@ -651,6 +655,7 @@
-     omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) {
-       omp.yield
-     }
-+    omp.terminator
-   }
-   return
- }
-@@ -662,6 +667,7 @@
-     omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) {
-       omp.yield
-     }
-+    omp.terminator
-   }
-   return
- }
-@@ -673,6 +679,7 @@
-     omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) {
-       omp.yield
-     }
-+    omp.terminator
-   }
-   return
- }
-@@ -692,30 +699,35 @@
-     omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
-       omp.yield
-     }
-+    omp.terminator
-   }
-   // CHECK: omp.distribute dist_schedule_static
-   omp.distribute dist_schedule_static {
-     omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
-       omp.yield
-     }
-+    omp.terminator
-   }
-   // CHECK: omp.distribute dist_schedule_static chunk_size(%{{.+}} : i32)
-   omp.distribute dist_schedule_static chunk_size(%chunk_size : i32) {
-     omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
-       omp.yield
-     }
-+    omp.terminator
-   }
-   // CHECK: omp.distribute order(concurrent)
-   omp.distribute order(concurrent) {
-     omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
-       omp.yield
-     }
-+    omp.terminator
-   }
-   // CHECK: omp.distribute allocate(%{{.+}} : memref<i32> -> %{{.+}} : memref<i32>)
-   omp.distribute allocate(%data_var : memref<i32> -> %data_var : memref<i32>) {
-     omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
-       omp.yield
-     }
-+    omp.terminator
-   }
-   // CHECK: omp.distribute
-   omp.distribute {
-@@ -723,7 +735,9 @@
-       omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
-         omp.yield
-       }
-+      omp.terminator
-     }
-+    omp.terminator
-   }
-   return
- }
-@@ -737,7 +751,7 @@
-     "omp.target"(%if_cond, %device, %num_threads) ({
-        // CHECK: omp.terminator
-        omp.terminator
--    }) {nowait, operandSegmentSizes = array<i32: 1,1,1,0,0,0,0,0>} : ( i1, si32, i32 ) -> ()
-+    }) {nowait, operandSegmentSizes = array<i32: 1,1,1,0,0,0,0,0,0,0,0,0,0>} : ( i1, si32, i32 ) -> ()
+@@ -4394,11 +4988,10 @@
  
-     // Test with optional map clause.
-     // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_1:.*]] : memref<?xi32>, tensor<?xi32>)   map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""}
-@@ -2201,6 +2215,7 @@
-       // CHECK: omp.yield
-       omp.yield
-     }
-+    omp.terminator
+   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+   if (ompBuilder->Config.isTargetDevice()) {
+-    if (isTargetDeviceOp(op)) {
++    if (isTargetDeviceOp(op))
+       return convertTargetDeviceOp(op, builder, moduleTranslation);
+-    } else {
++    else
+       return convertTargetOpsInNest(op, builder, moduleTranslation);
+-    }
    }
+   return convertHostOrTargetOperation(op, builder, moduleTranslation);
+ }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Dialect/OpenMP/invalid.mlir llvm-project-aso/mlir/test/Dialect/OpenMP/invalid.mlir
+--- llvm-project-aso-orig/mlir/test/Dialect/OpenMP/invalid.mlir	2024-11-23 20:25:27.503272791 -0600
++++ llvm-project-aso/mlir/test/Dialect/OpenMP/invalid.mlir	2024-11-23 20:39:47.200175294 -0600
+@@ -1391,24 +1391,18 @@
+ // -----
  
-   %testbool = "test.bool"() : () -> (i1)
-@@ -2211,6 +2226,7 @@
-       // CHECK: omp.yield
-       omp.yield
-     }
-+    omp.terminator
-   }
+ func.func @omp_teams_num_teams1(%lb : i32) {
+-  omp.target {
+-    // expected-error @below {{expected num_teams upper bound to be defined if the lower bound is defined}}
+-    "omp.teams" (%lb) ({
+-      omp.terminator
+-    }) {operandSegmentSizes = array<i32: 0,0,0,1,0,0,0,0>} : (i32) -> ()
++  // expected-error @below {{expected num_teams upper bound to be defined if the lower bound is defined}}
++  "omp.teams" (%lb) ({
+     omp.terminator
+-  }
++  }) {operandSegmentSizes = array<i32: 0,0,0,1,0,0,0,0>} : (i32) -> ()
+   return
+ }
  
-   // CHECK: omp.taskloop final(%{{[^)]+}}) {
-@@ -2219,6 +2235,7 @@
-       // CHECK: omp.yield
-       omp.yield
-     }
-+    omp.terminator
-   }
+ // -----
  
-   // CHECK: omp.taskloop untied {
-@@ -2227,6 +2244,7 @@
-       // CHECK: omp.yield
-       omp.yield
-     }
-+    omp.terminator
+ func.func @omp_teams_num_teams2(%lb : i32, %ub : i16) {
+-  omp.target {
+-    // expected-error @below {{expected num_teams upper bound and lower bound to be the same type}}
+-    omp.teams num_teams(%lb : i32 to %ub : i16) {
+-      omp.terminator
+-    }
++  // expected-error @below {{expected num_teams upper bound and lower bound to be the same type}}
++  omp.teams num_teams(%lb : i32 to %ub : i16) {
+     omp.terminator
    }
+   return
+@@ -2138,11 +2132,80 @@
  
-   // CHECK: omp.taskloop mergeable {
-@@ -2235,6 +2253,7 @@
-       // CHECK: omp.yield
-       omp.yield
-     }
-+    omp.terminator
-   }
+ // -----
  
-   %testf32 = "test.f32"() : () -> (!llvm.ptr)
-@@ -2245,6 +2264,7 @@
-       // CHECK: omp.yield
-       omp.yield
-     }
++func.func @omp_target_multiple_teams() {
++  // expected-error @below {{target containing multiple teams constructs}}
++  omp.target {
++    omp.teams {
++      omp.terminator
++    }
++    omp.teams {
++      omp.terminator
++    }
 +    omp.terminator
-   }
- 
-   // CHECK: omp.taskloop reduction(@add_f32 -> %{{.+}} : !llvm.ptr, @add_f32 -> %{{.+}} : !llvm.ptr) {
-@@ -2253,6 +2273,7 @@
-       // CHECK: omp.yield
-       omp.yield
-     }
++  }
++  return
++}
++
++// -----
++
++func.func @omp_target_host_eval1(%x : !llvm.ptr) {
++  // expected-error @below {{op host_eval argument illegal use in 'llvm.load' operation}}
++  omp.target host_eval(%x -> %arg0 : !llvm.ptr) {
++    %0 = llvm.load %arg0 : !llvm.ptr -> f32
 +    omp.terminator
-   }
- 
-   // CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr) reduction(@add_f32 -> %{{.+}} : !llvm.ptr) {
-@@ -2261,6 +2282,7 @@
-       // CHECK: omp.yield
-       omp.yield
-     }
++  }
++  return
++}
++
++// -----
++
++func.func @omp_target_host_eval2(%x : i1) {
++  // expected-error @below {{op host_eval argument only legal as 'num_teams' and 'thread_limit' in 'omp.teams'}}
++  omp.target host_eval(%x -> %arg0 : i1) {
++    omp.teams if(%arg0) {
++      omp.terminator
++    }
 +    omp.terminator
-   }
- 
-   %testi32 = "test.i32"() : () -> (i32)
-@@ -2270,6 +2292,7 @@
-       // CHECK: omp.yield
-       omp.yield
-     }
++  }
++  return
++}
++
++// -----
++
++func.func @omp_target_host_eval3(%x : i32) {
++  // expected-error @below {{op host_eval argument only legal as 'num_threads' in 'omp.parallel' when representing target SPMD}}
++  omp.target host_eval(%x -> %arg0 : i32) {
++    omp.parallel num_threads(%arg0 : i32) {
++      omp.terminator
++    }
 +    omp.terminator
-   }
- 
-   %testmemref = "test.memref"() : () -> (memref<i32>)
-@@ -2279,6 +2302,7 @@
-       // CHECK: omp.yield
-       omp.yield
-     }
++  }
++  return
++}
++
++// -----
++
++func.func @omp_target_host_eval3(%x : i32) {
++  // expected-error @below {{op host_eval argument only legal as loop bounds and steps in 'omp.loop_nest' when representing target SPMD}}
++  omp.target host_eval(%x -> %arg0 : i32) {
++    omp.wsloop {
++      omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
++        omp.yield
++      }
++      omp.terminator
++    }
 +    omp.terminator
-   }
++  }
++  return
++}
++
++// -----
++
+ func.func @omp_target_depend(%data_var: memref<i32>) {
+   // expected-error @below {{op expected as many depend values as depend variables}}
+     "omp.target"(%data_var) ({
+       "omp.terminator"() : () -> ()
+-    }) {depend_kinds = [], operandSegmentSizes = array<i32: 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0>} : (memref<i32>) -> ()
++    }) {depend_kinds = [], operandSegmentSizes = array<i32: 0,0,1,0,0,0,0,0,0,0,0,0>} : (memref<i32>) -> ()
+    "func.return"() : () -> ()
+ }
  
-   %testi64 = "test.i64"() : () -> (i64)
-@@ -2288,6 +2312,7 @@
-       // CHECK: omp.yield
-       omp.yield
-     }
-+    omp.terminator
-   }
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Dialect/OpenMP/ops.mlir llvm-project-aso/mlir/test/Dialect/OpenMP/ops.mlir
+--- llvm-project-aso-orig/mlir/test/Dialect/OpenMP/ops.mlir	2024-11-23 20:25:27.503272791 -0600
++++ llvm-project-aso/mlir/test/Dialect/OpenMP/ops.mlir	2024-11-23 20:39:47.200175294 -0600
+@@ -770,7 +770,7 @@
+     "omp.target"(%device, %if_cond, %num_threads) ({
+        // CHECK: omp.terminator
+        omp.terminator
+-    }) {nowait, operandSegmentSizes = array<i32: 0,0,0,1,0,1,0,0,0,0,1>} : ( si32, i1, i32 ) -> ()
++    }) {nowait, operandSegmentSizes = array<i32: 0,0,0,1,0,0,1,0,0,0,0,1>} : ( si32, i1, i32 ) -> ()
  
-   // CHECK: omp.taskloop num_tasks(%{{[^:]+}}: i64) {
-@@ -2296,6 +2321,7 @@
-       // CHECK: omp.yield
-       omp.yield
-     }
-+    omp.terminator
-   }
+     // Test with optional map clause.
+     // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_1:.*]] : memref<?xi32>, tensor<?xi32>)   map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""}
+@@ -2750,6 +2750,42 @@
+   return
+ }
  
-   // CHECK: omp.taskloop nogroup {
-@@ -2304,6 +2330,7 @@
-       // CHECK: omp.yield
-       omp.yield
-     }
++func.func @omp_target_host_eval(%x : i32) {
++  // CHECK: omp.target host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) {
++  // CHECK: omp.teams num_teams( to %[[HOST_ARG]] : i32)
++  // CHECK-SAME: thread_limit(%[[HOST_ARG]] : i32)
++  omp.target host_eval(%x -> %arg0 : i32) {
++    omp.teams num_teams(to %arg0 : i32) thread_limit(%arg0 : i32) {
++      omp.terminator
++    }
 +    omp.terminator
-   }
- 
-   // CHECK: omp.taskloop {
-@@ -2313,7 +2340,9 @@
-         // CHECK: omp.yield
-         omp.yield
-       }
++  }
++
++  // CHECK: omp.target host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) {
++  // CHECK: omp.teams
++  // CHECK: omp.parallel num_threads(%[[HOST_ARG]] : i32) {
++  // CHECK: omp.distribute {
++  // CHECK: omp.wsloop {
++  // CHECK: omp.loop_nest (%{{.*}}) : i32 = (%[[HOST_ARG]]) to (%[[HOST_ARG]]) step (%[[HOST_ARG]]) {
++  omp.target host_eval(%x -> %arg0 : i32) {
++    omp.teams {
++      omp.parallel num_threads(%arg0 : i32) {
++        omp.distribute {
++          omp.wsloop {
++            omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
++              omp.yield
++            }
++          } {omp.composite}
++        } {omp.composite}
++        omp.terminator
++      } {omp.composite}
 +      omp.terminator
-     }
++    }
 +    omp.terminator
-   }
- 
-   // CHECK: return
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir	2024-06-12 10:43:15.400181266 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir	2024-06-12 10:44:09.359614154 -0500
++  }
++  return
++}
++
+ // CHECK-LABEL: omp_loop
+ func.func @omp_loop(%lb : index, %ub : index, %step : index) {
+   // CHECK: omp.loop {
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir	2024-10-18 17:40:33.932977650 -0500
++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir	2024-11-23 20:39:47.200175294 -0600
 @@ -1,6 +1,6 @@
  // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
  
@@ -14491,7 +14172,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
    llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
      %0 = llvm.mlir.addressof @_QFEi : !llvm.ptr
      %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
-@@ -24,7 +24,7 @@
+@@ -23,7 +23,7 @@
    }
  }
  
@@ -14500,9 +14181,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
  
  // CHECK: entry:
  // CHECK: %[[ALLOCA_BYREF:.*]] = alloca ptr, align 8
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir	2024-06-12 10:43:15.400181266 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir	2024-06-12 10:44:09.359614154 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir	2024-10-18 17:40:33.932977650 -0500
++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir	2024-11-23 20:39:47.200175294 -0600
 @@ -10,7 +10,7 @@
  // constant sized) allocations performs its task reasonably in these 
  // scenarios. 
@@ -14512,7 +14193,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
    llvm.func @_QQmain() attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
      %1 = llvm.mlir.constant(1 : i64) : i64
      %2 = llvm.alloca %1 x !llvm.struct<(ptr)> : (i64) -> !llvm.ptr
-@@ -34,7 +34,7 @@
+@@ -33,7 +33,7 @@
    llvm.func @_ExternalCall(!llvm.ptr, !llvm.ptr) -> !llvm.struct<()>
  }
  
@@ -14521,9 +14202,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
  // CHECK-NEXT: entry:
  // CHECK-NEXT:  %[[MOVED_ALLOCA1:.*]] = alloca { ptr }, align 8
  // CHECK-NEXT:  %[[MOVED_ALLOCA2:.*]] = alloca i32, i64 1, align 4
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir	2024-06-12 10:43:15.400181266 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir	2024-06-12 10:44:09.359614154 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir	2024-10-18 17:40:33.936977609 -0500
++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir	2024-11-23 20:39:47.200175294 -0600
 @@ -1,6 +1,6 @@
  // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
  
@@ -14532,7 +14213,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
    llvm.func @_QQmain() attributes {bindc_name = "main"} {
      %0 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
      %1 = llvm.mlir.constant(10 : index) : i64
-@@ -31,7 +31,7 @@
+@@ -30,7 +30,7 @@
  }
  
  
@@ -14541,9 +14222,20 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
  
  // CHECK: %[[ARG1_ALLOCA:.*]] = alloca ptr, align 8
  // CHECK: store ptr %[[ARG1]], ptr %[[ARG1_ALLOCA]], align 8
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir
---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir	2024-06-12 10:43:15.400181266 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir	2024-06-12 10:44:09.359614154 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-debug.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-debug.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-debug.mlir	2024-10-18 17:40:33.936977609 -0500
++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-debug.mlir	2024-11-23 20:39:47.200175294 -0600
+@@ -1,6 +1,6 @@
+ // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+ 
+-module attributes {omp.is_target_device = true} {
++module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd-amdhsa"} {
+   llvm.func @_QQmain() {
+     %0 = llvm.mlir.constant(1 : i32) : i32
+     %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir	2024-10-18 17:40:33.936977609 -0500
++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir	2024-11-23 20:39:47.200175294 -0600
 @@ -7,7 +7,7 @@
  // Unfortunately, only so much can be tested as the device side is dependent on a *.bc
  // file created by the host and appended as an attribute to the module.
@@ -14553,10 +14245,393 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
    // CHECK-DAG: @_QMtest_0Esp_decl_tgt_ref_ptr = weak global ptr null, align 8
    llvm.mlir.global external @_QMtest_0Esp() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : i32 {
      %0 = llvm.mlir.constant(0 : i32) : i32
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir	2024-06-12 10:43:15.400181266 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir	2024-06-12 10:44:09.359614154 -0500
-@@ -55,7 +55,7 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir	2024-11-23 20:39:47.204175279 -0600
+@@ -0,0 +1,329 @@
++// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
++
++// This test checks the offload sizes, map types and base pointers and pointers
++// provided to the OpenMP kernel argument structure are correct when lowering
++// to LLVM-IR from MLIR when performing explicit member mapping of a record type
++// that includes fortran allocatables in various locations of the record types
++// hierarchy.
++
++module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
++  llvm.func @omp_map_derived_type_allocatable_member(%arg0: !llvm.ptr) {
++    %0 = llvm.mlir.constant(4 : index) : i64
++    %1 = llvm.mlir.constant(1 : index) : i64
++    %2 = llvm.mlir.constant(0 : index) : i64
++    %3 = omp.map.bounds lower_bound(%2 : i64) upper_bound(%0 : i64) extent(%0 : i64) stride(%1 : i64) start_idx(%2 : i64) {stride_in_bytes = true}
++    %4 = llvm.getelementptr %arg0[0, 4] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
++    %5 = llvm.getelementptr %4[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
++    %6 = omp.map.info var_ptr(%4 : !llvm.ptr, i32) var_ptr_ptr(%5 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%3) -> !llvm.ptr {name = ""}
++    %7 = omp.map.info var_ptr(%4 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%array_j"}
++    %8 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%7, %6 : [4,-1], [4,0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l", partial_map = true}
++    omp.target map_entries(%7 -> %arg1, %6 -> %arg2, %8 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
++      omp.terminator
++    }
++    llvm.return
++  }
++
++  llvm.func @omp_allocatable_derived_type_member_map(%arg0: !llvm.ptr) {
++    %0 = llvm.mlir.constant(1 : i32) : i32
++    %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
++    %2 = llvm.mlir.constant(1 : i32) : i32
++    %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
++    %4 = llvm.mlir.constant(5 : index) : i64
++    %5 = llvm.mlir.constant(4 : index) : i64
++    %6 = llvm.mlir.constant(1 : index) : i64
++    %7 = llvm.mlir.constant(0 : index) : i64
++    %8 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%5 : i64) extent(%5 : i64) stride(%6 : i64) start_idx(%7 : i64) {stride_in_bytes = true}
++    %9 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
++    llvm.store %9, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>, !llvm.ptr
++    %10 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
++    %11 = llvm.load %10 : !llvm.ptr -> !llvm.ptr
++    %12 = llvm.getelementptr %11[0, 4] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
++    %13 = llvm.getelementptr %12[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
++    %14 = omp.map.info var_ptr(%12 : !llvm.ptr, i32) var_ptr_ptr(%13 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%8) -> !llvm.ptr {name = ""}
++    %15 = omp.map.info var_ptr(%12 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%array_j"}
++    %16 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
++    llvm.store %16, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>, !llvm.ptr
++    %17 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
++    %18 = llvm.load %17 : !llvm.ptr -> !llvm.ptr
++    %19 = llvm.getelementptr %18[0, 5] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
++    %20 = omp.map.info var_ptr(%19 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%k"}
++    %21 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
++    %22 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>) var_ptr_ptr(%21 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
++    %23 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>) map_clauses(tofrom) capture(ByRef) members(%22, %15, %14, %20 : [0,-1,-1], [0,4,-1], [0,4,0], [0,5,-1] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l"}
++    omp.target map_entries(%22 -> %arg1, %15 -> %arg2, %14 -> %arg3, %20 -> %arg4, %23 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
++      omp.terminator
++    }
++    llvm.return
++  }
++
++  llvm.func @omp_alloca_nested_derived_type_map(%arg0: !llvm.ptr) {
++    %0 = llvm.mlir.constant(1 : i32) : i32
++    %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
++    %2 = llvm.mlir.constant(1 : i32) : i32
++    %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
++    %4 = llvm.mlir.constant(3 : index) : i64
++    %5 = llvm.mlir.constant(4 : index) : i64
++    %6 = llvm.mlir.constant(6 : index) : i64
++    %7 = llvm.mlir.constant(1 : index) : i64
++    %8 = llvm.mlir.constant(2 : index) : i64
++    %9 = llvm.mlir.constant(0 : index) : i64
++    %10 = omp.map.bounds lower_bound(%9 : i64) upper_bound(%5 : i64) extent(%5 : i64) stride(%7 : i64) start_idx(%9 : i64) {stride_in_bytes = true}
++    %11 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
++    llvm.store %11, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>, !llvm.ptr
++    %12 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
++    %13 = llvm.load %12 : !llvm.ptr -> !llvm.ptr
++    %14 = llvm.getelementptr %13[0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>
++    %15 = llvm.getelementptr %14[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
++    %16 = llvm.getelementptr %15[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
++    %17 = omp.map.info var_ptr(%15 : !llvm.ptr, i32) var_ptr_ptr(%16 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%10) -> !llvm.ptr {name = ""}
++    %18 = omp.map.info var_ptr(%15 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%array_k"}
++    %19 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
++    llvm.store %19, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>, !llvm.ptr
++    %20 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
++    %21 = llvm.load %20 : !llvm.ptr -> !llvm.ptr
++    %22 = llvm.getelementptr %21[0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>
++    %23 = llvm.getelementptr %22[0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
++    %24 = omp.map.info var_ptr(%23 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%k"}
++    %25 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
++    %26 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>) var_ptr_ptr(%25 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
++    %27 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>) map_clauses(tofrom) capture(ByRef) members(%26, %18, %17, %24 : [0,-1,-1,-1], [0,6,2,-1], [0,6,2,0], [0,6,3,-1] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l"}
++    omp.target map_entries(%26 -> %arg1, %18 -> %arg2, %17 -> %arg3, %24 -> %arg4, %27 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
++      omp.terminator
++    }
++    llvm.return
++  }
++
++  llvm.func @omp_nested_derived_type_alloca_map(%arg0: !llvm.ptr) {
++    %0 = llvm.mlir.constant(4 : index) : i64
++    %1 = llvm.mlir.constant(1 : index) : i64
++    %2 = llvm.mlir.constant(2 : index) : i64
++    %3 = llvm.mlir.constant(0 : index) : i64
++    %4 = llvm.mlir.constant(6 : index) : i64
++    %5 = omp.map.bounds lower_bound(%3 : i64) upper_bound(%0 : i64) extent(%0 : i64) stride(%1 : i64) start_idx(%3 : i64) {stride_in_bytes = true}
++    %6 = llvm.getelementptr %arg0[0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>
++    %7 = llvm.getelementptr %6[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
++    %8 = llvm.getelementptr %7[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
++    %9 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) var_ptr_ptr(%8 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%5) -> !llvm.ptr {name = ""}
++    %10 = omp.map.info var_ptr(%7 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%array_k"}
++    %11 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>) map_clauses(tofrom) capture(ByRef) members(%10, %9 : [6,2,-1], [6,2,0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l", partial_map = true}
++    omp.target map_entries(%10 -> %arg1, %9 -> %arg2, %11 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
++      omp.terminator
++    }
++    llvm.return
++  }
++}
++
++// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 20]
++// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675]
++// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [8 x i64] [i64 0, i64 40, i64 8, i64 136, i64 48, i64 8, i64 20, i64 4]
++// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [8 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 281474976710659]
++// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [8 x i64] [i64 0, i64 40, i64 8, i64 240, i64 48, i64 8, i64 20, i64 4]
++// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [8 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 281474976710659]
++// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 20]
++// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675]
++
++// CHECK: define void @omp_map_derived_type_allocatable_member(ptr %[[ARG:.*]]) {
++
++// CHECK: %[[DTYPE_ALLOCATABLE_MEMBER_GEP:.*]] = getelementptr %_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer, ptr %[[ARG]], i32 0, i32 4
++// CHECK: %[[ALLOCATABLE_MEMBER_BADDR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ALLOCATABLE_MEMBER_GEP]], i32 0, i32 0
++
++// CHECK: %[[LOAD_ALLOCATABLE_MEMBER_BADDR:.*]] = load ptr, ptr %[[ALLOCATABLE_MEMBER_BADDR]], align 8
++// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ALLOCATABLE_MEMBER_BADDR]], i64 0
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_1:.*]] = getelementptr i32, ptr %[[ARR_OFFSET]], i64 1
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_SIZE_SEGMENT_CALC_1]] to i64
++// CHECK:  %[[DTYPE_SIZE_SEGMENT_CALC_3:.*]] = ptrtoint ptr %[[DTYPE_ALLOCATABLE_MEMBER_GEP]] to i64
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]]
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
++// CHECK:  store ptr %[[DTYPE_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
++// CHECK:  store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
++// CHECK:  store ptr %[[DTYPE_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
++// CHECK:  store ptr %[[ALLOCATABLE_MEMBER_BADDR]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
++// CHECK:  store ptr %[[ALLOCATABLE_MEMBER_BADDR]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
++// CHECK:  store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK: define void @omp_allocatable_derived_type_member_map(ptr %[[ARG:.*]]) {
++
++// CHECK: %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8
++// CHECK: %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8
++// CHECK: %[[LOAD_DTYPE_ALLOCATABLE_ARG:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], align 8
++// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD_DTYPE_ALLOCATABLE_ARG]], ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA]], align 8
++// CHECK: %[[DTYPE_ALLOCATABLE_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA]], i32 0, i32 0
++// CHECK: %[[DTYPE_ALLOCATABLE_BADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_ALLOCATABLE_BADDR_GEP]], align 8
++// CHECK: %[[DTYPE_ALLOCATABLE_MEMBER_ACCESS:.*]] = getelementptr %_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer, ptr %[[DTYPE_ALLOCATABLE_BADDR_LOAD]], i32 0, i32 4
++// CHECK: %[[DTYPE_ALLOCATABLE_MEMBER_BADDR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ALLOCATABLE_MEMBER_ACCESS]], i32 0, i32 0
++// CHECK: %[[LOAD_DTYPE_ALLOCATABLE_ARG:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], align 8
++// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD_DTYPE_ALLOCATABLE_ARG]], ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA_2]], align 8
++// CHECK: %[[DTYPE_ALLOCATABLE_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA_2]], i32 0, i32 0
++// CHECK: %[[DTYPE_ALLOCATABLE_BADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_ALLOCATABLE_BADDR_GEP]], align 8
++// CHECK: %[[DTYPE_REGULAR_MEMBER_ACCESS:.*]] = getelementptr %_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer, ptr %[[DTYPE_ALLOCATABLE_BADDR_LOAD]], i32 0, i32 5
++// CHECK: %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], i32 0, i32 0
++// CHECK: %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_2_LOAD:.*]] = load ptr, ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_2]], align 8
++// CHECK: %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR]], align 8
++// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_LOAD]], i64 0
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], i32 1
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_SIZE_SEGMENT_CALC_1]] to i64
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_3:.*]] = ptrtoint ptr %[[ARG]] to i64
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]]
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 0
++// CHECK:  store ptr %[[ARG]], ptr %[[OFFLOAD_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [8 x i64], ptr %.offload_sizes, i32 0, i32 0
++// CHECK:  store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 1
++// CHECK:  store ptr %[[ARG]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 2
++// CHECK:  store ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_2]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
++// CHECK:  store ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_2]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 3
++// CHECK:  store ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR_2_LOAD]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 4
++// CHECK:  store ptr %[[DTYPE_ALLOCATABLE_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 5
++// CHECK:  store ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
++// CHECK:  store ptr %[[DTYPE_ALLOCATABLE_MEMBER_BADDR]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 6
++// CHECK:  store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 7
++// CHECK:  store ptr %[[DTYPE_REGULAR_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK: define void @omp_alloca_nested_derived_type_map(ptr %[[ARG:.*]]) {
++
++// CHECK: %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8
++// CHECK: %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8
++// CHECK: %[[LOAD_DTYPE_ALLOCATABLE_ARG:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], align 8
++// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD_DTYPE_ALLOCATABLE_ARG]], ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA]], align 8
++// CHECK: %[[DTYPE_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA]], i32 0, i32 0
++// CHECK: %[[DTYPE_BADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_BADDR_GEP]], align 8
++// CHECK: %[[DTYPE_NESTED_DTYPE_MEMBER_GEP:.*]] = getelementptr %_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer, ptr %[[DTYPE_BADDR_LOAD]], i32 0, i32 6
++// CHECK: %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_GEP:.*]] = getelementptr %_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer, ptr %[[DTYPE_NESTED_DTYPE_MEMBER_GEP]], i32 0, i32 2
++// CHECK: %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_GEP]], i32 0, i32 0
++// CHECK: %[[LOAD_DTYPE_ALLOCATABLE_ARG:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], align 8
++// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD_DTYPE_ALLOCATABLE_ARG]], ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA_2]], align 8
++// CHECK: %[[DTYPE_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[LOCAL_ALLOCATABLE_DTYPE_ALLOCA_2]], i32 0, i32 0
++// CHECK: %[[DTYPE_BADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_BADDR_GEP]], align 8
++// CHECK: %[[DTYPE_NESTED_DTYPE_MEMBER_GEP:.*]] = getelementptr %_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer, ptr %[[DTYPE_BADDR_LOAD]], i32 0, i32 6
++// CHECK: %[[DTYPE_NESTED_REGULAR_MEMBER_GEP:.*]] = getelementptr %_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer, ptr %[[DTYPE_NESTED_DTYPE_MEMBER_GEP]], i32 0, i32 3
++// CHECK: %[[DTYPE_ALLOCATABLE_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], i32 0, i32 0
++// CHECK: %[[DTYPE_ALLOCATABLE_BADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_ALLOCATABLE_BADDR_GEP]], align 8
++// CHECK: %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], align 8
++// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD]], i64 0
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG]], i32 1
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_SIZE_SEGMENT_CALC_1]] to i64
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_3:.*]] = ptrtoint ptr %[[ARG]] to i64
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]]
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 0
++// CHECK:  store ptr %[[ARG]], ptr %[[OFFLOAD_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [8 x i64], ptr %.offload_sizes, i32 0, i32 0
++// CHECK:  store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 1
++// CHECK:  store ptr %[[ARG]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 2
++// CHECK:  store ptr %[[DTYPE_ALLOCATABLE_BADDR_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
++// CHECK:  store ptr %[[DTYPE_ALLOCATABLE_BADDR_GEP]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 3
++// CHECK:  store ptr %[[DTYPE_ALLOCATABLE_BADDR_LOAD]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 4
++// CHECK:  store ptr %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 5
++// CHECK:  store ptr %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
++// CHECK:  store ptr %[[DTYPE_NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 6
++// CHECK:  store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 7
++// CHECK:  store ptr %[[DTYPE_NESTED_REGULAR_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK: define void @omp_nested_derived_type_alloca_map(ptr %[[ARG:.*]]) {
++
++// CHECK: %[[NESTED_DTYPE_MEMBER_GEP:.*]] = getelementptr %_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer, ptr %[[ARG]], i32 0, i32 6
++// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_GEP:.*]] = getelementptr %_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer, ptr %[[NESTED_DTYPE_MEMBER_GEP]], i32 0, i32 2
++// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], i32 0, i32 0
++// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD:.*]] = load ptr, ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], align 8
++// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD]], i64 0
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_1:.*]] = getelementptr i32, ptr %[[ARR_OFFSET]], i64 1
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_SIZE_SEGMENT_CALC_1]] to i64
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_3:.*]] = ptrtoint ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]] to i64
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]]
++// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
++// CHECK:  store ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
++// CHECK:  store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
++// CHECK:  store ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
++// CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
++// CHECK:  store ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
++
++// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
++// CHECK:  store ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[BASE_PTRS]], align 8
++// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
++// CHECK:  store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir	2024-11-23 20:39:47.204175279 -0600
+@@ -0,0 +1,46 @@
++// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
++
++module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
++  llvm.func @omp_target_region_() {
++    %out_teams = llvm.mlir.constant(1000 : i32) : i32
++    %out_threads = llvm.mlir.constant(2000 : i32) : i32
++    %out_lb = llvm.mlir.constant(0 : i32) : i32
++    %out_ub = llvm.mlir.constant(3000 : i32) : i32
++    %out_step = llvm.mlir.constant(1 : i32) : i32
++
++    omp.target
++      host_eval(%out_teams -> %teams, %out_threads -> %threads,
++                %out_lb -> %lb, %out_ub -> %ub, %out_step -> %step :
++                i32, i32, i32, i32, i32) {
++      omp.teams num_teams(to %teams : i32) thread_limit(%threads : i32) {
++        omp.parallel {
++          omp.distribute {
++            omp.wsloop {
++              omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
++                omp.yield
++              }
++            } {omp.composite}
++          } {omp.composite}
++          omp.terminator
++        } {omp.composite}
++        omp.terminator
++      }
++      omp.terminator
++    }
++    llvm.return
++  }
++}
++
++// CHECK-LABEL: define void @omp_target_region_
++// CHECK: %[[ARGS:.*]] = alloca %struct.__tgt_kernel_arguments
++
++// CHECK: %[[TRIPCOUNT_ADDR:.*]] = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %[[ARGS]], i32 0, i32 8
++// CHECK: store i64 3000, ptr %[[TRIPCOUNT_ADDR]]
++
++// CHECK: %[[TEAMS_ADDR:.*]] = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %[[ARGS]], i32 0, i32 10
++// CHECK: store [3 x i32] [i32 1000, i32 0, i32 0], ptr %[[TEAMS_ADDR]]
++
++// CHECK: %[[THREADS_ADDR:.*]] = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %[[ARGS]], i32 0, i32 11
++// CHECK: store [3 x i32] [i32 2000, i32 0, i32 0], ptr %[[THREADS_ADDR]]
++
++// CHECK: call i32 @__tgt_target_kernel(ptr @{{.*}}, i64 {{.*}}, i32 1000, i32 2000, ptr @{{.*}}, ptr %[[ARGS]])
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir	2024-10-18 17:40:33.936977609 -0500
++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir	2024-11-23 20:39:47.204175279 -0600
+@@ -52,7 +52,7 @@
  }
  
  // CHECK: define weak_odr protected amdgpu_kernel void @[[FUNC0:.*]](
@@ -14565,7 +14640,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
  // CHECK:         %[[TMP1:.*]] = alloca [1 x ptr], align 8, addrspace(5)
  // CHECK:         %[[TMP2:.*]] = addrspacecast ptr addrspace(5) %[[TMP1]] to ptr
  // CHECK:         %[[STRUCTARG:.*]] = alloca { ptr }, align 8, addrspace(5)
-@@ -68,8 +68,8 @@
+@@ -65,8 +65,8 @@
  // CHECK:         br i1 %[[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]]
  // CHECK:         %[[TMP6:.*]] = load ptr, ptr %[[TMP4]], align 8
  // CHECK:         %[[OMP_GLOBAL_THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
@@ -14576,7 +14651,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
  // CHECK:         %[[TMP7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[TMP2]], i64 0, i64 0
  // CHECK:         store ptr %[[STRUCTARG_ASCAST]], ptr %[[TMP7]], align 8
  // CHECK:         call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr @[[FUNC1:.*]], ptr null, ptr %[[TMP2]], i64 1)
-@@ -99,7 +99,7 @@
+@@ -96,7 +96,7 @@
  // is passed as a param to kmpc_parallel_51 function
  
  // CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(
@@ -14585,9 +14660,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
  // CHECK:         store ptr %[[IFCOND_ARG2]], ptr %[[IFCOND_TMP1:.*]], align 8
  // CHECK:         %[[IFCOND_TMP2:.*]] = load i32, ptr %[[IFCOND_TMP1]], align 4
  // CHECK:         %[[IFCOND_TMP3:.*]] = icmp ne i32 %[[IFCOND_TMP2]], 0
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir	2024-06-12 10:43:15.400181266 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir	2024-06-12 10:44:09.359614154 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir	2024-10-18 17:40:33.936977609 -0500
++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir	2024-11-23 20:39:47.204175279 -0600
 @@ -4,10 +4,11 @@
  // for nested omp do loop inside omp target region
  
@@ -14603,7 +14678,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
      omp.parallel {
        %loop_ub = llvm.mlir.constant(9 : i32) : i32
        %loop_lb = llvm.mlir.constant(0 : i32) : i32
-@@ -37,14 +38,14 @@
+@@ -36,14 +37,14 @@
  // CHECK-SAME:  ptr %[[ARG_PTR:.*]])
  // CHECK-SAME:  #[[ATTRS1:[0-9]+]]
  // CHECK: call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB]] to ptr),
@@ -14621,9 +14696,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
 +// CHECK:      attributes #[[ATTRS2]] = {
  // CHECK-SAME:  "target-cpu"="gfx90a"
  // CHECK-SAME:  "target-features"="+gfx9-insts,+wavefrontsize64"
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir	2024-06-12 10:43:15.400181266 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir	2024-06-12 10:44:09.359614154 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir	2024-10-18 17:40:33.936977609 -0500
++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir	2024-11-23 20:39:47.204175279 -0600
 @@ -1,6 +1,6 @@
  // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
  
@@ -14632,7 +14707,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
    llvm.func @omp_target_region_() {
      %0 = llvm.mlir.constant(20 : i32) : i32
      %1 = llvm.mlir.constant(10 : i32) : i32
-@@ -30,8 +30,8 @@
+@@ -29,8 +29,8 @@
  // CHECK:      @[[SRC_LOC:.*]] = private unnamed_addr constant [23 x i8] c"{{[^"]*}}", align 1
  // CHECK:      @[[IDENT:.*]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[SRC_LOC]] }, align 8
  // CHECK:      @[[DYNA_ENV:.*]] = weak_odr protected global %struct.DynamicEnvironmentTy zeroinitializer
@@ -14643,9 +14718,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
  // CHECK:        %[[TMP_A:.*]] = alloca ptr, align 8
  // CHECK:        store ptr %[[ADDR_A]], ptr %[[TMP_A]], align 8
  // CHECK:        %[[TMP_B:.*]] = alloca ptr, align 8
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir	2024-06-12 10:43:15.400181266 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir	2024-06-12 10:44:09.359614154 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir	2024-10-18 17:40:33.936977609 -0500
++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir	2024-11-23 20:39:47.204175279 -0600
 @@ -1,6 +1,6 @@
  // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
  
@@ -14654,16 +14729,16 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
    llvm.func @omp_target_region_() {
      %0 = llvm.mlir.constant(20 : i32) : i32
      %1 = llvm.mlir.constant(10 : i32) : i32
-@@ -37,5 +37,5 @@
+@@ -36,5 +36,5 @@
    }
  }
  
 -// CHECK: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l19
 +// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l19
  // CHECK: ret void
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir
---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir	2024-06-12 10:43:15.400181266 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir	2024-06-12 10:44:09.359614154 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir	2024-08-27 20:36:32.060104964 -0500
++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir	2024-11-23 20:39:47.204175279 -0600
 @@ -5,7 +5,9 @@
  
  module attributes {omp.is_target_device = true} {
@@ -14675,9 +14750,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
      omp.teams {
        llvm.call @foo(%arg0) : (i32) -> ()
        omp.terminator
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir	2024-06-12 10:43:15.400181266 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir	2024-06-12 10:44:09.359614154 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir	2024-10-18 17:40:33.936977609 -0500
++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir	2024-11-23 20:39:47.204175279 -0600
 @@ -4,7 +4,9 @@
  // for nested omp do loop with collapse clause inside omp target region
  
@@ -14689,7 +14764,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
      %loop_ub = llvm.mlir.constant(99 : i32) : i32
      %loop_lb = llvm.mlir.constant(0 : i32) : i32
      %loop_step = llvm.mlir.constant(1 : index) : i32
-@@ -25,7 +27,7 @@
+@@ -24,7 +26,7 @@
  
  // CHECK: define void @[[FUNC_COLLAPSED_WSLOOP:.*]](ptr %[[ARG0:.*]])
  // CHECK:   call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr),
@@ -14698,9 +14773,9 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
  // CHECK-SAME: i32 %[[NUM_THREADS:.*]], i32 0)
  
  // CHECK: define internal void @[[COLLAPSED_WSLOOP_BODY_FN]](i32 %[[LOOP_CNT:.*]], ptr %[[LOOP_BODY_ARG:.*]])
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
---- llvm-project.orig/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir	2024-06-12 10:43:15.400181266 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir	2024-06-12 10:44:09.359614154 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir	2024-10-18 17:40:33.936977609 -0500
++++ llvm-project-aso/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir	2024-11-23 20:39:47.204175279 -0600
 @@ -4,7 +4,9 @@
  // for nested omp do loop inside omp target region
  
@@ -14712,7 +14787,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
        %loop_ub = llvm.mlir.constant(9 : i32) : i32
        %loop_lb = llvm.mlir.constant(0 : i32) : i32
        %loop_step = llvm.mlir.constant(1 : i32) : i32
-@@ -19,7 +21,9 @@
+@@ -18,7 +20,9 @@
      llvm.return
    }
  
@@ -14723,7 +14798,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
        %loop_ub = llvm.mlir.constant(9 : i32) : i32
        %loop_lb = llvm.mlir.constant(0 : i32) : i32
        %loop_step = llvm.mlir.constant(1 : i32) : i32
-@@ -36,10 +40,10 @@
+@@ -34,10 +38,10 @@
  // CHECK: define void @[[FUNC0:.*]](ptr %[[ARG0:.*]])
  // CHECK:   %[[STRUCTARG:.*]] = alloca { ptr }, align 8, addrspace(5)
  // CHECK:   %[[STRUCTARG_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[STRUCTARG]] to ptr
@@ -14737,7 +14812,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
  
  // CHECK: define internal void @[[LOOP_BODY_FN]](i32 %[[LOOP_CNT:.*]], ptr %[[LOOP_BODY_ARG:.*]])
  // CHECK:   %[[GEP2:.*]] = getelementptr { ptr }, ptr %[[LOOP_BODY_ARG]], i32 0, i32 0
-@@ -48,6 +52,6 @@
+@@ -46,6 +50,6 @@
  // CHECK:   store i32 %[[VAL0:.*]], ptr %[[GEP3]], align 4
  
  // CHECK: define void @[[FUNC_EMPTY_WSLOOP:.*]]()
@@ -14745,10 +14820,10 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/omptarge
 +// CHECK:   call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), ptr @[[LOOP_EMPTY_BODY_FN:.*]], ptr null, i32 9, i32 %[[NUM_THREADS:.*]], i32 0)
  
  // CHECK: define internal void @[[LOOP_EMPTY_BODY_FN]](i32 %[[LOOP_CNT:.*]])
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/openmp-llvm.mlir llvm-project/mlir/test/Target/LLVMIR/openmp-llvm.mlir
---- llvm-project.orig/mlir/test/Target/LLVMIR/openmp-llvm.mlir	2024-06-12 10:43:15.400181266 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/openmp-llvm.mlir	2024-06-12 10:44:09.359614154 -0500
-@@ -699,7 +699,7 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-llvm.mlir llvm-project-aso/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-llvm.mlir	2024-11-14 15:28:41.946639261 -0600
++++ llvm-project-aso/mlir/test/Target/LLVMIR/openmp-llvm.mlir	2024-11-23 20:39:47.204175279 -0600
+@@ -700,7 +700,7 @@
  // CHECK-LABEL: @simd_simple_multiple
  llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) {
    omp.simd {
@@ -14757,47 +14832,7 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/openmp-l
        %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
        // The form of the emitted IR is controlled by OpenMPIRBuilder and
        // tested there. Just check that the right metadata is added and collapsed
-@@ -726,6 +726,7 @@
-       llvm.store %3, %5 : f32, !llvm.ptr
-       omp.yield
-     }
-+    omp.terminator
-   }
-   llvm.return
- }
-@@ -749,6 +750,7 @@
-       llvm.store %3, %5 : f32, !llvm.ptr
-       omp.yield
-     }
-+    omp.terminator
-   }
-   llvm.return
- }
-@@ -769,6 +771,7 @@
-       llvm.store %3, %5 : f32, !llvm.ptr
-       omp.yield
-     }
-+    omp.terminator
-   }
-   llvm.return
- }
-@@ -788,6 +791,7 @@
-       llvm.store %3, %5 : f32, !llvm.ptr
-       omp.yield
-     }
-+    omp.terminator
-   }
-   llvm.return
- }
-@@ -816,6 +820,7 @@
-       llvm.store %arg2, %1 : i32, !llvm.ptr
-       omp.yield
-     }
-+    omp.terminator
-   }
-   llvm.return
- }
-@@ -2178,7 +2183,7 @@
+@@ -2323,7 +2323,7 @@
        // CHECK: [[SECTION3]]:
        // CHECK:   br label %[[REGION3:[^ ,]*]]
        // CHECK: [[REGION3]]:
@@ -14806,23 +14841,42 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/openmp-l
        %add = llvm.add %arg0, %arg1 : i32
        // CHECK:   store i32 %{{.*}}, ptr %{{.*}}, align 4
        // CHECK:   br label %{{.*}}
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/openmp-reduction.mlir llvm-project/mlir/test/Target/LLVMIR/openmp-reduction.mlir
---- llvm-project.orig/mlir/test/Target/LLVMIR/openmp-reduction.mlir	2024-06-12 10:43:15.400181266 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/openmp-reduction.mlir	2024-06-12 10:44:09.363614112 -0500
-@@ -565,8 +565,8 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-reduction.mlir llvm-project-aso/mlir/test/Target/LLVMIR/openmp-reduction.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-reduction.mlir	2024-11-06 08:35:35.855248041 -0600
++++ llvm-project-aso/mlir/test/Target/LLVMIR/openmp-reduction.mlir	2024-11-23 20:39:47.204175279 -0600
+@@ -559,7 +559,7 @@
  // CHECK: define internal void @[[OUTLINED]]
  
  // Private reduction variable and its initialization.
 -// CHECK: %[[PRIVATE:[0-9]+]] = alloca i32
--// CHECK: store i32 0, ptr %[[PRIVATE]]
 +// CHECK: %[[PRIVATE:private_redvar]] = alloca i32
-+// CHECK-NEXT: store i32 0, ptr %[[PRIVATE]]
+ // CHECK: store i32 0, ptr %[[PRIVATE]]
  
  // Loop exit:
- // CHECK: call void @__kmpc_barrier
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir llvm-project/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
---- llvm-project.orig/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir	2024-06-12 10:43:15.400181266 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir	2024-06-12 10:44:09.363614112 -0500
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir llvm-project-aso/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir	2024-10-18 17:40:33.940977568 -0500
++++ llvm-project-aso/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir	2024-11-23 20:39:47.204175279 -0600
+@@ -3,7 +3,7 @@
+ // This tests check that target code nested inside a target data region which
+ // has only use_device_ptr mapping corectly generates code on the device pass.
+ 
+-// CHECK:         define weak_odr protected void @__omp_offloading{{.*}}main_
++// CHECK:         define weak_odr protected {{.*}} void @__omp_offloading{{.*}}main_
+ // CHECK-NEXT:       entry:
+ // CHECK-NEXT:         %[[VAL_3:.*]] = alloca ptr, align 8
+ // CHECK-NEXT:         store ptr %[[VAL_4:.*]], ptr %[[VAL_3]], align 8
+@@ -17,7 +17,7 @@
+ // CHECK-NEXT:         %[[VAL_13:.*]] = load ptr, ptr %[[VAL_11]], align 8
+ // CHECK-NEXT:         store i32 999, ptr %[[VAL_13]], align 4
+ // CHECK-NEXT:         br label %[[VAL_14:.*]]
+-module attributes {omp.is_target_device = true } {
++module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd-amdhsa"} {
+   llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
+     %0 = llvm.mlir.constant(1 : i64) : i64
+     %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir llvm-project-aso/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir	2024-10-18 17:40:33.940977568 -0500
++++ llvm-project-aso/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir	2024-11-23 20:39:47.204175279 -0600
 @@ -3,7 +3,7 @@
  // This tests the fix for https://github.com/llvm/llvm-project/issues/84606
  // We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash.
@@ -14832,20 +14886,162 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/mlir/test/Target/LLVMIR/openmp-t
    llvm.func @_QQmain() attributes {fir.bindc_name = "main", omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
      %0 = llvm.mlir.constant(0 : i32) : i32
      %1 = llvm.mlir.constant(1 : i64) : i64
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/offload/test/offloading/fortran/target_private.f90 llvm-project/offload/test/offloading/fortran/target_private.f90
---- llvm-project.orig/offload/test/offloading/fortran/target_private.f90	1969-12-31 18:00:00.000000000 -0600
-+++ llvm-project/offload/test/offloading/fortran/target_private.f90	2024-06-12 10:44:09.363614112 -0500
-@@ -0,0 +1,29 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-todo.mlir llvm-project-aso/mlir/test/Target/LLVMIR/openmp-todo.mlir
+--- llvm-project-aso-orig/mlir/test/Target/LLVMIR/openmp-todo.mlir	2024-11-14 15:28:41.946639261 -0600
++++ llvm-project-aso/mlir/test/Target/LLVMIR/openmp-todo.mlir	2024-11-23 20:39:47.204175279 -0600
+@@ -66,10 +66,55 @@
+ 
+ // -----
+ 
+-llvm.func @distribute(%lb : i32, %ub : i32, %step : i32) {
+-  // expected-error@below {{not yet implemented: omp.distribute}}
++llvm.func @distribute_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
++  // expected-error@below {{not yet implemented: Unhandled clause allocate in omp.distribute operation}}
+   // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
+-  omp.distribute {
++  omp.distribute allocate(%x : !llvm.ptr -> %x : !llvm.ptr) {
++    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
++      omp.yield
++    }
++  }
++  llvm.return
++}
++
++// -----
++
++llvm.func @distribute_dist_schedule(%lb : i32, %ub : i32, %step : i32, %x : i32) {
++  // expected-error@below {{not yet implemented: Unhandled clause dist_schedule in omp.distribute operation}}
++  // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
++  omp.distribute dist_schedule_static dist_schedule_chunk_size(%x : i32) {
++    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
++      omp.yield
++    }
++  }
++  llvm.return
++}
++
++// -----
++
++llvm.func @distribute_order(%lb : i32, %ub : i32, %step : i32) {
++  // expected-error@below {{not yet implemented: Unhandled clause order in omp.distribute operation}}
++  // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
++  omp.distribute order(concurrent) {
++    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
++      omp.yield
++    }
++  }
++  llvm.return
++}
++
++// -----
++
++omp.private {type = private} @x.privatizer : !llvm.ptr alloc {
++^bb0(%arg0: !llvm.ptr):
++  %0 = llvm.mlir.constant(1 : i32) : i32
++  %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
++  omp.yield(%1 : !llvm.ptr)
++}
++llvm.func @distribute_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
++  // expected-error@below {{not yet implemented: Unhandled clause privatization in omp.distribute operation}}
++  // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
++  omp.distribute private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
+     omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+       omp.yield
+     }
+@@ -278,17 +323,6 @@
+ 
+ // -----
+ 
+-llvm.func @target_if(%x : i1) {
+-  // expected-error@below {{not yet implemented: Unhandled clause if in omp.target operation}}
+-  // expected-error@below {{LLVM Translation failed for operation: omp.target}}
+-  omp.target if(%x) {
+-    omp.terminator
+-  }
+-  llvm.return
+-}
+-
+-// -----
+-
+ omp.declare_reduction @add_f32 : f32
+ init {
+ ^bb0(%arg: f32):
+@@ -364,17 +398,6 @@
+ 
+ // -----
+ 
+-llvm.func @target_thread_limit(%x : i32) {
+-  // expected-error@below {{not yet implemented: Unhandled clause thread_limit in omp.target operation}}
+-  // expected-error@below {{LLVM Translation failed for operation: omp.target}}
+-  omp.target thread_limit(%x : i32) {
+-    omp.terminator
+-  }
+-  llvm.return
+-}
+-
+-// -----
+-
+ llvm.func @target_enter_data_depend(%x: !llvm.ptr) {
+   // expected-error@below {{not yet implemented: Unhandled clause depend in omp.target_enter_data operation}}
+   // expected-error@below {{LLVM Translation failed for operation: omp.target_enter_data}}
+@@ -578,34 +601,6 @@
+     omp.terminator
+   }
+   llvm.return
+-}
+-
+-// -----
+-
+-omp.declare_reduction @add_f32 : f32
+-init {
+-^bb0(%arg: f32):
+-  %0 = llvm.mlir.constant(0.0 : f32) : f32
+-  omp.yield (%0 : f32)
+-}
+-combiner {
+-^bb1(%arg0: f32, %arg1: f32):
+-  %1 = llvm.fadd %arg0, %arg1 : f32
+-  omp.yield (%1 : f32)
+-}
+-atomic {
+-^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
+-  %2 = llvm.load %arg3 : !llvm.ptr -> f32
+-  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
+-  omp.yield
+-}
+-llvm.func @teams_reduction(%x : !llvm.ptr) {
+-  // expected-error@below {{not yet implemented: Unhandled clause reduction in omp.teams operation}}
+-  // expected-error@below {{LLVM Translation failed for operation: omp.teams}}
+-  omp.teams reduction(@add_f32 %x -> %prv : !llvm.ptr) {
+-    omp.terminator
+-  }
+-  llvm.return
+ }
+ 
+ // -----
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/offload/CMakeLists.txt llvm-project-aso/offload/CMakeLists.txt
+--- llvm-project-aso-orig/offload/CMakeLists.txt	2024-11-14 15:28:41.950639246 -0600
++++ llvm-project-aso/offload/CMakeLists.txt	2024-11-23 20:39:47.204175279 -0600
+@@ -101,9 +101,9 @@
+ 
+   # Check for flang
+   if (NOT MSVC)
+-    set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang)
++    set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang-new)
+   else()
+-    set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang.exe)
++    set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang-new.exe)
+   endif()
+ 
+   # Set fortran test compiler if flang is found
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/offload/test/offloading/fortran/target_private.f90 llvm-project-aso/offload/test/offloading/fortran/target_private.f90
+--- llvm-project-aso-orig/offload/test/offloading/fortran/target_private.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-aso/offload/test/offloading/fortran/target_private.f90	2024-11-23 20:39:47.204175279 -0600
+@@ -0,0 +1,23 @@
 +! Basic offloading test with a target region
-+! REQUIRES: flang
-+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
-+! UNSUPPORTED: aarch64-unknown-linux-gnu
-+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
-+! UNSUPPORTED: x86_64-pc-linux-gnu
-+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
-+
-+! RUN: %libomptarget-compile-fortran-generic
-+! RUN: env LIBOMPTARGET_INFO=16 %libomptarget-run-generic 2>&1 | %fcheck-generic
++! REQUIRES: flang, amdgpu
++
++! RUN: %libomptarget-compile-fortran-run-and-check-generic
 +program target_update
 +    implicit none
 +    integer :: x(1)
@@ -14862,13 +15058,28 @@ diff -Naur -x .git -x '*.pyc' llvm-project.orig/offload/test/offloading/fortran/
 +    print *, "y =", y(1)
 +
 +end program target_update
-+! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}}
++
 +! CHECK: x = 42
 +! CHECK: y = 84
-diff -Naur -x .git -x '*.pyc' llvm-project.orig/openmp/runtime/src/CMakeLists.txt llvm-project/openmp/runtime/src/CMakeLists.txt
---- llvm-project.orig/openmp/runtime/src/CMakeLists.txt	2024-06-12 10:43:15.448180762 -0500
-+++ llvm-project/openmp/runtime/src/CMakeLists.txt	2024-06-12 10:44:09.363614112 -0500
-@@ -147,6 +147,11 @@
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/openmp/CMakeLists.txt llvm-project-aso/openmp/CMakeLists.txt
+--- llvm-project-aso-orig/openmp/CMakeLists.txt	2024-10-18 17:40:33.952977445 -0500
++++ llvm-project-aso/openmp/CMakeLists.txt	2024-11-23 20:39:47.204175279 -0600
+@@ -79,9 +79,9 @@
+ 
+   # Check for flang
+   if (NOT MSVC)
+-    set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang)
++    set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang-new)
+   else()
+-    set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang.exe)
++    set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang-new.exe)
+   endif()
+ 
+   # Set fortran test compiler if flang is found
+diff -Naur -x .git -x '*.pyc' llvm-project-aso-orig/openmp/runtime/src/CMakeLists.txt llvm-project-aso/openmp/runtime/src/CMakeLists.txt
+--- llvm-project-aso-orig/openmp/runtime/src/CMakeLists.txt	2024-11-23 20:25:27.515272748 -0600
++++ llvm-project-aso/openmp/runtime/src/CMakeLists.txt	2024-11-23 20:39:47.204175279 -0600
+@@ -153,6 +153,11 @@
  set_source_files_properties(${LIBOMP_CXXFILES} PROPERTIES COMPILE_FLAGS "${LIBOMP_CONFIGURED_CXXFLAGS}")
  set_source_files_properties(${LIBOMP_ASMFILES} ${LIBOMP_GNUASMFILES} PROPERTIES COMPILE_FLAGS "${LIBOMP_CONFIGURED_ASMFLAGS}")
  
diff --git a/trunk/patches/ATD_full.patch b/trunk/patches/ATD_full.patch
index 79fd32dca..173a45fd3 100644
--- a/trunk/patches/ATD_full.patch
+++ b/trunk/patches/ATD_full.patch
@@ -1,6 +1,6 @@
-diff -Naur -x .git llvm-project.upstream/clang/include/clang/Basic/DiagnosticDriverKinds.td llvm-project/clang/include/clang/Basic/DiagnosticDriverKinds.td
---- llvm-project.upstream/clang/include/clang/Basic/DiagnosticDriverKinds.td	2024-11-19 12:55:58.281826424 -0500
-+++ llvm-project/clang/include/clang/Basic/DiagnosticDriverKinds.td	2024-11-19 12:49:04.556151210 -0500
+diff -Naur -x .git llvm-project-trunk/clang/include/clang/Basic/DiagnosticDriverKinds.td llvm-project-trunk-atd/clang/include/clang/Basic/DiagnosticDriverKinds.td
+--- llvm-project-trunk/clang/include/clang/Basic/DiagnosticDriverKinds.td	2024-11-23 20:25:45.999206301 -0600
++++ llvm-project-trunk-atd/clang/include/clang/Basic/DiagnosticDriverKinds.td	2024-11-23 20:26:13.503107411 -0600
 @@ -146,9 +146,6 @@
  def warn_drv_unsupported_openmp_library : Warning<
    "the library '%0=%1' is not supported, OpenMP will not be enabled">,
@@ -11,9 +11,9 @@ diff -Naur -x .git llvm-project.upstream/clang/include/clang/Basic/DiagnosticDri
  
  def err_drv_invalid_thread_model_for_target : Error<
    "invalid thread model '%0' in '%1' for this target">;
-diff -Naur -x .git llvm-project.upstream/clang/include/clang/Basic/DiagnosticGroups.td llvm-project/clang/include/clang/Basic/DiagnosticGroups.td
---- llvm-project.upstream/clang/include/clang/Basic/DiagnosticGroups.td	2024-11-19 12:54:59.881016873 -0500
-+++ llvm-project/clang/include/clang/Basic/DiagnosticGroups.td	2024-11-19 12:49:04.556151210 -0500
+diff -Naur -x .git llvm-project-trunk/clang/include/clang/Basic/DiagnosticGroups.td llvm-project-trunk-atd/clang/include/clang/Basic/DiagnosticGroups.td
+--- llvm-project-trunk/clang/include/clang/Basic/DiagnosticGroups.td	2024-11-23 20:30:46.590124482 -0600
++++ llvm-project-trunk-atd/clang/include/clang/Basic/DiagnosticGroups.td	2024-10-29 11:08:03.513484408 -0500
 @@ -1584,7 +1584,3 @@
  // Warnings about using the non-standard extension having an explicit specialization
  // with a storage class specifier.
@@ -22,10 +22,10 @@ diff -Naur -x .git llvm-project.upstream/clang/include/clang/Basic/DiagnosticGro
 -// A warning for options that enable a feature that is not yet complete
 -def ExperimentalOption : DiagGroup<"experimental-option">;
 -
-diff -Naur -x .git llvm-project.upstream/clang/include/clang/Driver/Options.td llvm-project/clang/include/clang/Driver/Options.td
---- llvm-project.upstream/clang/include/clang/Driver/Options.td	2024-11-19 12:55:58.290826394 -0500
-+++ llvm-project/clang/include/clang/Driver/Options.td	2024-11-19 12:49:04.571151163 -0500
-@@ -6112,7 +6112,7 @@
+diff -Naur -x .git llvm-project-trunk/clang/include/clang/Driver/Options.td llvm-project-trunk-atd/clang/include/clang/Driver/Options.td
+--- llvm-project-trunk/clang/include/clang/Driver/Options.td	2024-11-23 20:30:46.598124454 -0600
++++ llvm-project-trunk-atd/clang/include/clang/Driver/Options.td	2024-11-23 20:26:13.515107367 -0600
+@@ -6118,7 +6118,7 @@
  def _sysroot : Separate<["--"], "sysroot">, Alias<_sysroot_EQ>;
  
  //===----------------------------------------------------------------------===//
@@ -34,7 +34,7 @@ diff -Naur -x .git llvm-project.upstream/clang/include/clang/Driver/Options.td l
  //===----------------------------------------------------------------------===//
  let Visibility = [ClangOption, FlangOption] in {
  
-@@ -6128,7 +6128,7 @@
+@@ -6134,7 +6134,7 @@
  } // let Vis = [Default, FlangOption]
  
  //===----------------------------------------------------------------------===//
@@ -43,7 +43,7 @@ diff -Naur -x .git llvm-project.upstream/clang/include/clang/Driver/Options.td l
  //===----------------------------------------------------------------------===//
  let Flags = [TargetSpecific] in {
  let Visibility = [ClangOption, FlangOption] in {
-@@ -6876,6 +6876,7 @@
+@@ -6882,6 +6882,7 @@
  defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">;
  defm implicit_none : OptInFC1FFlag<"implicit-none", "No implicit typing allowed unless overridden by IMPLICIT statements">;
  defm underscoring : OptInFC1FFlag<"underscoring", "Appends one trailing underscore to external names">;
@@ -51,7 +51,7 @@ diff -Naur -x .git llvm-project.upstream/clang/include/clang/Driver/Options.td l
  defm ppc_native_vec_elem_order: BoolOptionWithoutMarshalling<"f", "ppc-native-vector-element-order",
    PosFlag<SetTrue, [], [ClangOption], "Specifies PowerPC native vector element order (default)">,
    NegFlag<SetFalse, [], [ClangOption], "Specifies PowerPC non-native vector element order">>;
-@@ -6892,6 +6893,10 @@
+@@ -6898,6 +6899,10 @@
  
  def fhermetic_module_files : Flag<["-"], "fhermetic-module-files">, Group<f_Group>,
    HelpText<"Emit hermetic module files (no nested USE association)">;
@@ -62,7 +62,7 @@ diff -Naur -x .git llvm-project.upstream/clang/include/clang/Driver/Options.td l
  } // let Visibility = [FC1Option, FlangOption]
  
  def J : JoinedOrSeparate<["-"], "J">,
-@@ -8266,7 +8271,7 @@
+@@ -8272,7 +8277,7 @@
  // CUDA Options
  //===----------------------------------------------------------------------===//
  
@@ -71,10 +71,10 @@ diff -Naur -x .git llvm-project.upstream/clang/include/clang/Driver/Options.td l
  
  def fcuda_is_device : Flag<["-"], "fcuda-is-device">,
    HelpText<"Generate code for CUDA device">,
-diff -Naur -x .git llvm-project.upstream/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
---- llvm-project.upstream/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp	2024-11-19 12:55:58.343826221 -0500
-+++ llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp	2024-11-19 12:49:04.656150898 -0500
-@@ -745,14 +745,14 @@
+diff -Naur -x .git llvm-project-trunk/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp llvm-project-trunk-atd/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+--- llvm-project-trunk/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp	2024-11-23 20:25:46.067206057 -0600
++++ llvm-project-trunk-atd/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp	2024-11-23 20:26:13.599107065 -0600
+@@ -744,14 +744,14 @@
  void CGOpenMPRuntimeGPU::emitKernelInit(const OMPExecutableDirective &D,
                                          CodeGenFunction &CGF,
                                          EntryFunctionState &EST, bool IsSPMD) {
@@ -95,7 +95,7 @@ diff -Naur -x .git llvm-project.upstream/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cp
    if (!IsSPMD)
      emitGenericVarsProlog(CGF, EST.Loc);
  }
-@@ -1659,7 +1659,6 @@
+@@ -1658,7 +1658,6 @@
      return;
  
    bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind);
@@ -103,7 +103,7 @@ diff -Naur -x .git llvm-project.upstream/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cp
    bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind);
  
    ASTContext &C = CGM.getContext();
-@@ -1756,7 +1755,7 @@
+@@ -1755,7 +1754,7 @@
    llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
        OMPBuilder.createReductionsGPU(
            OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, false, TeamsReduction,
@@ -112,10 +112,10 @@ diff -Naur -x .git llvm-project.upstream/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cp
            CGF.getTarget().getGridValue(),
            C.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc);
    assert(AfterIP && "unexpected error creating GPU reductions");
-diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/Driver.cpp llvm-project/clang/lib/Driver/Driver.cpp
---- llvm-project.upstream/clang/lib/Driver/Driver.cpp	2024-11-19 12:55:58.353826188 -0500
-+++ llvm-project/clang/lib/Driver/Driver.cpp	2024-11-19 12:49:04.671150852 -0500
-@@ -2029,7 +2029,7 @@
+diff -Naur -x .git llvm-project-trunk/clang/lib/Driver/Driver.cpp llvm-project-trunk-atd/clang/lib/Driver/Driver.cpp
+--- llvm-project-trunk/clang/lib/Driver/Driver.cpp	2024-11-23 20:30:46.622124367 -0600
++++ llvm-project-trunk-atd/clang/lib/Driver/Driver.cpp	2024-11-23 20:26:13.615107008 -0600
+@@ -2028,7 +2028,7 @@
  
  void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const {
    if (IsFlangMode()) {
@@ -124,10 +124,10 @@ diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/Driver.cpp llvm-projec
    } else {
      // FIXME: The following handlers should use a callback mechanism, we don't
      // know what the client would like to do.
-diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/ToolChain.cpp llvm-project/clang/lib/Driver/ToolChain.cpp
---- llvm-project.upstream/clang/lib/Driver/ToolChain.cpp	2024-11-19 12:55:58.354826185 -0500
-+++ llvm-project/clang/lib/Driver/ToolChain.cpp	2024-11-19 12:49:04.672150848 -0500
-@@ -416,9 +416,6 @@
+diff -Naur -x .git llvm-project-trunk/clang/lib/Driver/ToolChain.cpp llvm-project-trunk-atd/clang/lib/Driver/ToolChain.cpp
+--- llvm-project-trunk/clang/lib/Driver/ToolChain.cpp	2024-11-23 20:25:46.079206014 -0600
++++ llvm-project-trunk-atd/clang/lib/Driver/ToolChain.cpp	2024-11-23 20:26:13.615107008 -0600
+@@ -414,9 +414,6 @@
        {"cl", "--driver-mode=cl"},
        {"++", "--driver-mode=g++"},
        {"flang", "--driver-mode=flang"},
@@ -137,10 +137,10 @@ diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/ToolChain.cpp llvm-pro
        {"clang-dxc", "--driver-mode=dxc"},
    };
  
-diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/ToolChains/Clang.cpp llvm-project/clang/lib/Driver/ToolChains/Clang.cpp
---- llvm-project.upstream/clang/lib/Driver/ToolChains/Clang.cpp	2024-11-19 12:55:58.357826175 -0500
-+++ llvm-project/clang/lib/Driver/ToolChains/Clang.cpp	2024-11-19 12:49:04.675150839 -0500
-@@ -8897,7 +8897,9 @@
+diff -Naur -x .git llvm-project-trunk/clang/lib/Driver/ToolChains/Clang.cpp llvm-project-trunk-atd/clang/lib/Driver/ToolChains/Clang.cpp
+--- llvm-project-trunk/clang/lib/Driver/ToolChains/Clang.cpp	2024-11-23 20:25:46.083206000 -0600
++++ llvm-project-trunk-atd/clang/lib/Driver/ToolChains/Clang.cpp	2024-11-23 20:26:13.619106993 -0600
+@@ -8902,7 +8902,9 @@
    assert(Input.isFilename() && "Invalid input.");
    CmdArgs.push_back(Input.getFilename());
  
@@ -151,9 +151,9 @@ diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/ToolChains/Clang.cpp l
    if (D.CC1Main && !D.CCGenDiagnostics) {
      // Invoke cc1as directly in this process.
      C.addCommand(std::make_unique<CC1Command>(
-diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/ToolChains/Flang.cpp llvm-project/clang/lib/Driver/ToolChains/Flang.cpp
---- llvm-project.upstream/clang/lib/Driver/ToolChains/Flang.cpp	2024-11-19 12:55:00.012016447 -0500
-+++ llvm-project/clang/lib/Driver/ToolChains/Flang.cpp	2024-11-19 12:49:04.676150836 -0500
+diff -Naur -x .git llvm-project-trunk/clang/lib/Driver/ToolChains/Flang.cpp llvm-project-trunk-atd/clang/lib/Driver/ToolChains/Flang.cpp
+--- llvm-project-trunk/clang/lib/Driver/ToolChains/Flang.cpp	2024-11-23 20:30:46.626124353 -0600
++++ llvm-project-trunk-atd/clang/lib/Driver/ToolChains/Flang.cpp	2024-11-14 15:29:46.794383844 -0600
 @@ -120,7 +120,9 @@
                     options::OPT_fintrinsic_modules_path, options::OPT_pedantic,
                     options::OPT_std_EQ, options::OPT_W_Joined,
@@ -202,9 +202,9 @@ diff -Naur -x .git llvm-project.upstream/clang/lib/Driver/ToolChains/Flang.cpp l
 +Flang::Flang(const ToolChain &TC) : Tool("flang-new", "flang frontend", TC) {}
  
  Flang::~Flang() {}
-diff -Naur -x .git llvm-project.upstream/clang/test/ClangScanDeps/multiple-commands.c llvm-project/clang/test/ClangScanDeps/multiple-commands.c
---- llvm-project.upstream/clang/test/ClangScanDeps/multiple-commands.c	2022-11-28 13:39:57.977654739 -0500
-+++ llvm-project/clang/test/ClangScanDeps/multiple-commands.c	2024-05-13 09:10:50.667094940 -0400
+diff -Naur -x .git llvm-project-trunk/clang/test/ClangScanDeps/multiple-commands.c llvm-project-trunk-atd/clang/test/ClangScanDeps/multiple-commands.c
+--- llvm-project-trunk/clang/test/ClangScanDeps/multiple-commands.c	2024-08-27 20:04:03.984046081 -0500
++++ llvm-project-trunk-atd/clang/test/ClangScanDeps/multiple-commands.c	2024-08-28 08:37:25.104601402 -0500
 @@ -133,7 +133,7 @@
  // CHECK-NEXT:           "{{.*}}tu_save_temps_module.o"
  // CHECK:                "{{.*}}tu_save_temps_module.s"
@@ -214,9 +214,9 @@ diff -Naur -x .git llvm-project.upstream/clang/test/ClangScanDeps/multiple-comma
  // CHECK:              "input-file": "[[PREFIX]]{{.}}tu_save_temps_module.c"
  // CHECK-NEXT:       }
  // CHECK-NEXT:     ]
-diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/flang.f90 llvm-project/clang/test/Driver/flang/flang.f90
---- llvm-project.upstream/clang/test/Driver/flang/flang.f90	2024-11-19 12:55:00.400015185 -0500
-+++ llvm-project/clang/test/Driver/flang/flang.f90	2024-02-19 15:32:04.212924968 -0500
+diff -Naur -x .git llvm-project-trunk/clang/test/Driver/flang/flang.f90 llvm-project-trunk-atd/clang/test/Driver/flang/flang.f90
+--- llvm-project-trunk/clang/test/Driver/flang/flang.f90	2024-10-18 17:40:44.568868604 -0500
++++ llvm-project-trunk-atd/clang/test/Driver/flang/flang.f90	2024-08-27 20:12:44.001839780 -0500
 @@ -13,7 +13,7 @@
  ! * (no type specified, resulting in an object file)
  
@@ -226,9 +226,9 @@ diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/flang.f90 llvm-
  
  ! Check that f90 files are not treated as "previously preprocessed"
  ! ... in --driver-mode=flang.
-diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/flang_ucase.F90 llvm-project/clang/test/Driver/flang/flang_ucase.F90
---- llvm-project.upstream/clang/test/Driver/flang/flang_ucase.F90	2024-11-19 12:55:00.400015185 -0500
-+++ llvm-project/clang/test/Driver/flang/flang_ucase.F90	2024-02-19 15:32:04.212924968 -0500
+diff -Naur -x .git llvm-project-trunk/clang/test/Driver/flang/flang_ucase.F90 llvm-project-trunk-atd/clang/test/Driver/flang/flang_ucase.F90
+--- llvm-project-trunk/clang/test/Driver/flang/flang_ucase.F90	2024-10-18 17:40:44.568868604 -0500
++++ llvm-project-trunk-atd/clang/test/Driver/flang/flang_ucase.F90	2024-08-27 20:12:44.001839780 -0500
 @@ -13,7 +13,7 @@
  ! * (no type specified, resulting in an object file)
  
@@ -238,9 +238,9 @@ diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/flang_ucase.F90
  
  ! Check that f90 files are not treated as "previously preprocessed"
  ! ... in --driver-mode=flang.
-diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/multiple-inputs.f90 llvm-project/clang/test/Driver/flang/multiple-inputs.f90
---- llvm-project.upstream/clang/test/Driver/flang/multiple-inputs.f90	2024-11-19 12:55:00.400015185 -0500
-+++ llvm-project/clang/test/Driver/flang/multiple-inputs.f90	2024-02-19 15:32:04.212924968 -0500
+diff -Naur -x .git llvm-project-trunk/clang/test/Driver/flang/multiple-inputs.f90 llvm-project-trunk-atd/clang/test/Driver/flang/multiple-inputs.f90
+--- llvm-project-trunk/clang/test/Driver/flang/multiple-inputs.f90	2024-10-18 17:40:44.568868604 -0500
++++ llvm-project-trunk-atd/clang/test/Driver/flang/multiple-inputs.f90	2024-08-27 20:12:44.001839780 -0500
 @@ -1,7 +1,7 @@
  ! Check that flang driver can handle multiple inputs at once.
  
@@ -251,9 +251,9 @@ diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/multiple-inputs
 -! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}flang{{[^"/]*}}" "-fc1"
 +! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}flang-new{{[^"/]*}}" "-fc1"
  ! CHECK-SYNTAX-ONLY: "{{[^"]*}}/Inputs/two.f90"
-diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/multiple-inputs-mixed.f90 llvm-project/clang/test/Driver/flang/multiple-inputs-mixed.f90
---- llvm-project.upstream/clang/test/Driver/flang/multiple-inputs-mixed.f90	2024-11-19 12:55:00.400015185 -0500
-+++ llvm-project/clang/test/Driver/flang/multiple-inputs-mixed.f90	2024-02-19 15:32:04.212924968 -0500
+diff -Naur -x .git llvm-project-trunk/clang/test/Driver/flang/multiple-inputs-mixed.f90 llvm-project-trunk-atd/clang/test/Driver/flang/multiple-inputs-mixed.f90
+--- llvm-project-trunk/clang/test/Driver/flang/multiple-inputs-mixed.f90	2024-10-18 17:40:44.568868604 -0500
++++ llvm-project-trunk-atd/clang/test/Driver/flang/multiple-inputs-mixed.f90	2024-08-27 20:12:44.001839780 -0500
 @@ -1,7 +1,7 @@
  ! Check that flang can handle mixed C and fortran inputs.
  
@@ -263,9 +263,9 @@ diff -Naur -x .git llvm-project.upstream/clang/test/Driver/flang/multiple-inputs
  ! CHECK-SYNTAX-ONLY: "{{[^"]*}}/Inputs/one.f90"
  ! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}clang{{[^"/]*}}" "-cc1"
  ! CHECK-SYNTAX-ONLY: "{{[^"]*}}/Inputs/other.c"
-diff -Naur -x .git llvm-project.upstream/clang/test/Driver/hip-target-id.hip llvm-project/clang/test/Driver/hip-target-id.hip
---- llvm-project.upstream/clang/test/Driver/hip-target-id.hip	2024-07-09 19:05:25.426802216 -0400
-+++ llvm-project/clang/test/Driver/hip-target-id.hip	2024-06-17 09:19:11.129662101 -0400
+diff -Naur -x .git llvm-project-trunk/clang/test/Driver/hip-target-id.hip llvm-project-trunk-atd/clang/test/Driver/hip-target-id.hip
+--- llvm-project-trunk/clang/test/Driver/hip-target-id.hip	2024-08-27 20:04:04.692043863 -0500
++++ llvm-project-trunk-atd/clang/test/Driver/hip-target-id.hip	2024-08-28 08:37:25.104601402 -0500
 @@ -26,7 +26,7 @@
  // CHECK-SAME: "-target-feature" "+sramecc"
  // CHECK-SAME: "-target-feature" "+xnack"
@@ -275,9 +275,9 @@ diff -Naur -x .git llvm-project.upstream/clang/test/Driver/hip-target-id.hip llv
  // TMP-SAME: "-target-cpu" "gfx908"
  // TMP-SAME: "-target-feature" "+sramecc"
  // TMP-SAME: "-target-feature" "+xnack"
-diff -Naur -x .git llvm-project.upstream/clang/test/OpenMP/irbuilder_nested_parallel_for.c llvm-project/clang/test/OpenMP/irbuilder_nested_parallel_for.c
---- llvm-project.upstream/clang/test/OpenMP/irbuilder_nested_parallel_for.c	2024-08-19 13:39:47.298756651 -0400
-+++ llvm-project/clang/test/OpenMP/irbuilder_nested_parallel_for.c	2024-08-20 14:56:30.867033548 -0400
+diff -Naur -x .git llvm-project-trunk/clang/test/OpenMP/irbuilder_nested_parallel_for.c llvm-project-trunk-atd/clang/test/OpenMP/irbuilder_nested_parallel_for.c
+--- llvm-project-trunk/clang/test/OpenMP/irbuilder_nested_parallel_for.c	2024-08-27 20:04:04.880043275 -0500
++++ llvm-project-trunk-atd/clang/test/OpenMP/irbuilder_nested_parallel_for.c	2024-08-28 08:37:25.104601402 -0500
 @@ -120,14 +120,14 @@
  // CHECK-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0)
  // CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4
@@ -3706,9 +3706,9 @@ diff -Naur -x .git llvm-project.upstream/clang/test/OpenMP/irbuilder_nested_para
 +// CHECK-DEBUG-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META436]]
 +// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG439]]
  //
-diff -Naur -x .git llvm-project.upstream/flang/docs/DoConcurrentConversionToOpenMP.md llvm-project/flang/docs/DoConcurrentConversionToOpenMP.md
---- llvm-project.upstream/flang/docs/DoConcurrentConversionToOpenMP.md	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/docs/DoConcurrentConversionToOpenMP.md	2024-08-12 11:55:29.868279345 -0400
+diff -Naur -x .git llvm-project-trunk/flang/docs/DoConcurrentConversionToOpenMP.md llvm-project-trunk-atd/flang/docs/DoConcurrentConversionToOpenMP.md
+--- llvm-project-trunk/flang/docs/DoConcurrentConversionToOpenMP.md	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/docs/DoConcurrentConversionToOpenMP.md	2024-08-28 08:37:25.104601402 -0500
 @@ -0,0 +1,332 @@
 +<!--===- docs/DoConcurrentMappingToOpenMP.md
 +
@@ -4042,9 +4042,9 @@ diff -Naur -x .git llvm-project.upstream/flang/docs/DoConcurrentConversionToOpen
 +```
 +We defer this to the (hopefully) near future when we get the conversion in a
 +good share for the samples/projects at hand.
-diff -Naur -x .git llvm-project.upstream/flang/docs/FlangDriver.md llvm-project/flang/docs/FlangDriver.md
---- llvm-project.upstream/flang/docs/FlangDriver.md	2024-11-19 12:55:00.528014768 -0500
-+++ llvm-project/flang/docs/FlangDriver.md	2024-11-19 12:49:05.139149393 -0500
+diff -Naur -x .git llvm-project-trunk/flang/docs/FlangDriver.md llvm-project-trunk-atd/flang/docs/FlangDriver.md
+--- llvm-project-trunk/flang/docs/FlangDriver.md	2024-10-18 17:40:44.648867784 -0500
++++ llvm-project-trunk-atd/flang/docs/FlangDriver.md	2024-10-18 14:35:15.659084408 -0500
 @@ -15,13 +15,17 @@
  ```
  
@@ -4293,9 +4293,9 @@ diff -Naur -x .git llvm-project.upstream/flang/docs/FlangDriver.md llvm-project/
  
  See the
  [`WritingAnLLVMNewPMPass`](https://llvm.org/docs/WritingAnLLVMNewPMPass.html#id9)
-diff -Naur -x .git llvm-project.upstream/flang/docs/ImplementingASemanticCheck.md llvm-project/flang/docs/ImplementingASemanticCheck.md
---- llvm-project.upstream/flang/docs/ImplementingASemanticCheck.md	2024-11-19 12:55:00.528014768 -0500
-+++ llvm-project/flang/docs/ImplementingASemanticCheck.md	2024-02-19 15:30:03.817299881 -0500
+diff -Naur -x .git llvm-project-trunk/flang/docs/ImplementingASemanticCheck.md llvm-project-trunk-atd/flang/docs/ImplementingASemanticCheck.md
+--- llvm-project-trunk/flang/docs/ImplementingASemanticCheck.md	2024-10-18 17:40:44.648867784 -0500
++++ llvm-project-trunk-atd/flang/docs/ImplementingASemanticCheck.md	2024-08-27 20:12:45.145834015 -0500
 @@ -68,7 +68,7 @@
  
  I also used this program to produce a parse tree for the program using the command:
@@ -4314,9 +4314,9 @@ diff -Naur -x .git llvm-project.upstream/flang/docs/ImplementingASemanticCheck.m
  ```
  
  This produced the output:
-diff -Naur -x .git llvm-project.upstream/flang/docs/Overview.md llvm-project/flang/docs/Overview.md
---- llvm-project.upstream/flang/docs/Overview.md	2024-11-19 12:55:00.529014765 -0500
-+++ llvm-project/flang/docs/Overview.md	2024-02-19 15:30:03.817299881 -0500
+diff -Naur -x .git llvm-project-trunk/flang/docs/Overview.md llvm-project-trunk-atd/flang/docs/Overview.md
+--- llvm-project-trunk/flang/docs/Overview.md	2024-10-18 17:40:44.648867784 -0500
++++ llvm-project-trunk-atd/flang/docs/Overview.md	2024-08-27 20:12:45.149833996 -0500
 @@ -65,8 +65,8 @@
  **Entry point:** `parser::Parsing::Prescan`
  
@@ -4378,9 +4378,9 @@ diff -Naur -x .git llvm-project.upstream/flang/docs/Overview.md llvm-project/fla
  
  ## Object code generation and linking
  
-diff -Naur -x .git llvm-project.upstream/flang/examples/FlangOmpReport/FlangOmpReport.cpp llvm-project/flang/examples/FlangOmpReport/FlangOmpReport.cpp
---- llvm-project.upstream/flang/examples/FlangOmpReport/FlangOmpReport.cpp	2024-11-19 12:55:00.529014765 -0500
-+++ llvm-project/flang/examples/FlangOmpReport/FlangOmpReport.cpp	2024-02-19 15:30:03.821299869 -0500
+diff -Naur -x .git llvm-project-trunk/flang/examples/FlangOmpReport/FlangOmpReport.cpp llvm-project-trunk-atd/flang/examples/FlangOmpReport/FlangOmpReport.cpp
+--- llvm-project-trunk/flang/examples/FlangOmpReport/FlangOmpReport.cpp	2024-10-18 17:40:44.648867784 -0500
++++ llvm-project-trunk-atd/flang/examples/FlangOmpReport/FlangOmpReport.cpp	2024-08-27 20:12:45.153833974 -0500
 @@ -9,7 +9,7 @@
  // all the OpenMP constructs and clauses and which line they're located on.
  //
@@ -4390,9 +4390,9 @@ diff -Naur -x .git llvm-project.upstream/flang/examples/FlangOmpReport/FlangOmpR
  // -fopenmp
  //
  //===----------------------------------------------------------------------===//
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Decimal/binary-floating-point.h llvm-project/flang/include/flang/Decimal/binary-floating-point.h
---- llvm-project.upstream/flang/include/flang/Decimal/binary-floating-point.h	2024-04-25 06:12:45.079227174 -0400
-+++ llvm-project/flang/include/flang/Decimal/binary-floating-point.h	2024-11-19 12:49:05.140149390 -0500
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Decimal/binary-floating-point.h llvm-project-trunk-atd/flang/include/flang/Decimal/binary-floating-point.h
+--- llvm-project-trunk/flang/include/flang/Decimal/binary-floating-point.h	2024-08-27 20:04:05.560041141 -0500
++++ llvm-project-trunk-atd/flang/include/flang/Decimal/binary-floating-point.h	2024-10-18 17:40:53.576776249 -0500
 @@ -32,6 +32,7 @@
  
  template <int BINARY_PRECISION> class BinaryFloatingPointNumber {
@@ -4409,9 +4409,9 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Decimal/binary-floa
    static constexpr RawType significandMask{(RawType{1} << significandBits) - 1};
  
    constexpr RT_API_ATTRS BinaryFloatingPointNumber() {} // zero
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Frontend/CodeGenOptions.def llvm-project/flang/include/flang/Frontend/CodeGenOptions.def
---- llvm-project.upstream/flang/include/flang/Frontend/CodeGenOptions.def	2024-01-02 11:37:59.657301324 -0500
-+++ llvm-project/flang/include/flang/Frontend/CodeGenOptions.def	2024-11-19 12:49:05.141149387 -0500
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Frontend/CodeGenOptions.def llvm-project-trunk-atd/flang/include/flang/Frontend/CodeGenOptions.def
+--- llvm-project-trunk/flang/include/flang/Frontend/CodeGenOptions.def	2024-08-27 20:04:05.564041128 -0500
++++ llvm-project-trunk-atd/flang/include/flang/Frontend/CodeGenOptions.def	2024-10-18 17:40:53.576776249 -0500
 @@ -35,10 +35,13 @@
  CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
  
@@ -4426,9 +4426,9 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Frontend/CodeGenOpt
 +
  #undef CODEGENOPT
  #undef ENUM_CODEGENOPT
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Frontend/CodeGenOptions.h llvm-project/flang/include/flang/Frontend/CodeGenOptions.h
---- llvm-project.upstream/flang/include/flang/Frontend/CodeGenOptions.h	2024-11-19 12:55:00.531014758 -0500
-+++ llvm-project/flang/include/flang/Frontend/CodeGenOptions.h	2024-11-19 12:49:05.141149387 -0500
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Frontend/CodeGenOptions.h llvm-project-trunk-atd/flang/include/flang/Frontend/CodeGenOptions.h
+--- llvm-project-trunk/flang/include/flang/Frontend/CodeGenOptions.h	2024-10-18 17:40:44.648867784 -0500
++++ llvm-project-trunk-atd/flang/include/flang/Frontend/CodeGenOptions.h	2024-10-18 17:40:53.576776249 -0500
 @@ -15,6 +15,7 @@
  #ifndef FORTRAN_FRONTEND_CODEGENOPTIONS_H
  #define FORTRAN_FRONTEND_CODEGENOPTIONS_H
@@ -4448,9 +4448,9 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Frontend/CodeGenOpt
    // Define accessors/mutators for code generation options of enumeration type.
  #define CODEGENOPT(Name, Bits, Default)
  #define ENUM_CODEGENOPT(Name, Type, Bits, Default)                             \
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Lower/OpenMP/Clauses.h llvm-project/flang/include/flang/Lower/OpenMP/Clauses.h
---- llvm-project.upstream/flang/include/flang/Lower/OpenMP/Clauses.h	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/include/flang/Lower/OpenMP/Clauses.h	2024-11-19 12:49:05.143149381 -0500
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Lower/OpenMP/Clauses.h llvm-project-trunk-atd/flang/include/flang/Lower/OpenMP/Clauses.h
+--- llvm-project-trunk/flang/include/flang/Lower/OpenMP/Clauses.h	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/include/flang/Lower/OpenMP/Clauses.h	2024-11-14 15:29:47.006383013 -0600
 @@ -0,0 +1,330 @@
 +//===-- Clauses.h -- OpenMP clause handling -------------------------------===//
 +//
@@ -4782,10 +4782,10 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Lower/OpenMP/Clause
 +} // namespace Fortran::lower::omp
 +
 +#endif // FORTRAN_LOWER_OPENMP_CLAUSES_H
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Lower/OpenMP/Utils.h llvm-project/flang/include/flang/Lower/OpenMP/Utils.h
---- llvm-project.upstream/flang/include/flang/Lower/OpenMP/Utils.h	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/include/flang/Lower/OpenMP/Utils.h	2024-11-19 12:49:05.143149381 -0500
-@@ -0,0 +1,181 @@
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Lower/OpenMP/Utils.h llvm-project-trunk-atd/flang/include/flang/Lower/OpenMP/Utils.h
+--- llvm-project-trunk/flang/include/flang/Lower/OpenMP/Utils.h	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/include/flang/Lower/OpenMP/Utils.h	2024-11-23 20:26:13.799106347 -0600
+@@ -0,0 +1,169 @@
 +//===-- Lower/OpenMP/Utils.h ------------------------------------*- C++ -*-===//
 +//
 +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -4886,23 +4886,34 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Lower/OpenMP/Utils.
 +  // Placement of the member in the member vector.
 +  llvm::SmallVector<mlir::omp::MapInfoOp> memberMap;
 +
-+  // The list of associated parent object symbols. used to track data we
-+  // need for various parent processing tasks when performing member
-+  // mapping, the main example currently being re-evaluating the parent
-+  // maps bounds at the final step of map processing, where we need to
-+  // keep a hold of all of the omp::Object's which contain array bounds
-+  // for the respective parent to calculate the final bounds from.
-+  //
-+  // As an Example:
-+  //
-+  // !$omp target map(tofrom: alloca_dtype_arr(2)%array_i,
-+  // alloca_dtype_arr(3)%array_i)
-+  //
-+  // parentObjList will contain alloca_dtype_arr(3) as well as
-+  // alloca_dtype_arr(2).
-+  ObjectList parentObjList;
++  bool isDuplicateMemberMapInfo(llvm::SmallVectorImpl<int64_t> &memberIndices) {
++    return llvm::find_if(memberPlacementIndices, [&](auto &memberData) {
++             return llvm::equal(memberIndices, memberData);
++           }) != memberPlacementIndices.end();
++  }
++
++  void addChildIndexAndMapToParent(const omp::Object &object,
++                                   mlir::omp::MapInfoOp &mapOp,
++                                   semantics::SemanticsContext &semaCtx);
 +};
 +
++mlir::omp::MapInfoOp
++createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
++                mlir::Value baseAddr, mlir::Value varPtrPtr,
++                llvm::StringRef name, llvm::ArrayRef<mlir::Value> bounds,
++                llvm::ArrayRef<mlir::Value> members,
++                mlir::ArrayAttr membersIndex, uint64_t mapType,
++                mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy,
++                bool partialMap = false);
++
++void insertChildMapInfoIntoParent(
++    Fortran::lower::AbstractConverter &converter,
++    Fortran::semantics::SemanticsContext &semaCtx,
++    Fortran::lower::StatementContext &stmtCtx,
++    std::map<Object, OmpMapParentAndMemberData> &parentMemberIndices,
++    llvm::SmallVectorImpl<mlir::Value> &mapOperands,
++    llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms);
++
 +void generateMemberPlacementIndices(
 +    const Object &object, llvm::SmallVectorImpl<int64_t> &indices,
 +    Fortran::semantics::SemanticsContext &semaCtx);
@@ -4910,38 +4921,15 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Lower/OpenMP/Utils.
 +bool isMemberOrParentAllocatableOrPointer(
 +    const Object &object, Fortran::semantics::SemanticsContext &semaCtx);
 +
-+bool isDuplicateMemberMapInfo(OmpMapParentAndMemberData &parentMembers,
-+                              llvm::SmallVectorImpl<int64_t> &memberIndices);
-+
-+mlir::omp::MapInfoOp createMapInfoOp(
-+    fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value baseAddr,
-+    mlir::Value varPtrPtr, std::string name, mlir::ArrayRef<mlir::Value> bounds,
-+    mlir::ArrayRef<mlir::Value> members, mlir::ArrayAttr membersIndex,
-+    uint64_t mapType, mlir::omp::VariableCaptureKind mapCaptureType,
-+    mlir::Type retTy, bool partialMap = false);
-+
 +mlir::Value createParentSymAndGenIntermediateMaps(
 +    mlir::Location clauseLocation, Fortran::lower::AbstractConverter &converter,
 +    semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx,
-+    omp::ObjectList &objectList, llvm::SmallVector<int64_t> &indices,
-+    OmpMapParentAndMemberData &parentMemberIndices, std::string asFortran,
++    omp::ObjectList &objectList, llvm::SmallVectorImpl<int64_t> &indices,
++    OmpMapParentAndMemberData &parentMemberIndices, llvm::StringRef asFortran,
 +    llvm::omp::OpenMPOffloadMappingFlags mapTypeBits);
 +
-+omp::ObjectList gatherObjects(omp::Object obj,
-+                              semantics::SemanticsContext &semaCtx);
-+
-+void addChildIndexAndMapToParent(const omp::Object &object,
-+                                 OmpMapParentAndMemberData &parentMemberIndices,
-+                                 mlir::omp::MapInfoOp &mapOp,
-+                                 semantics::SemanticsContext &semaCtx);
-+
-+void insertChildMapInfoIntoParent(
-+    Fortran::lower::AbstractConverter &converter,
-+    Fortran::semantics::SemanticsContext &semaCtx,
-+    Fortran::lower::StatementContext &stmtCtx,
-+    std::map<Object, OmpMapParentAndMemberData> &parentMemberIndices,
-+    llvm::SmallVectorImpl<mlir::Value> &mapOperands,
-+    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &mapSymbols);
++omp::ObjectList gatherObjectsOf(omp::Object derivedTypeMember,
++                                semantics::SemanticsContext &semaCtx);
 +
 +mlir::Type getLoopVarType(lower::AbstractConverter &converter,
 +                          std::size_t loopVarTypeSize);
@@ -4967,9 +4955,9 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Lower/OpenMP/Utils.
 +} // namespace Fortran
 +
 +#endif // FORTRAN_LOWER_OPENMPUTILS_H
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h llvm-project/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
---- llvm-project.upstream/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h	2024-11-19 12:55:00.532014755 -0500
-+++ llvm-project/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h	2024-11-19 12:49:05.143149381 -0500
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h llvm-project-trunk-atd/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
+--- llvm-project-trunk/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h	2024-11-14 15:29:00.870564483 -0600
++++ llvm-project-trunk-atd/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h	2024-11-14 15:29:47.006383013 -0600
 @@ -67,7 +67,7 @@
    //  end subroutine
    //  -------------------------------------------------
@@ -4979,24 +4967,9 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/Analysis/
    //
    //  ------------------- test.fir --------------------
    //  fir.global @_QMtopEa : !fir.box<!fir.ptr<!fir.array<?xf32>>>
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/Builder/FIRBuilder.h llvm-project/flang/include/flang/Optimizer/Builder/FIRBuilder.h
---- llvm-project.upstream/flang/include/flang/Optimizer/Builder/FIRBuilder.h	2024-11-19 12:55:58.474825793 -0500
-+++ llvm-project/flang/include/flang/Optimizer/Builder/FIRBuilder.h	2024-11-19 12:49:05.143149381 -0500
-@@ -215,6 +215,11 @@
-                             llvm::ArrayRef<mlir::Value> lenParams,
-                             bool asTarget = false);
- 
-+  /// Create a two dimensional ArrayAttr containing integer data as
-+  /// IntegerAttrs, effectively: ArrayAttr<ArrayAttr<IntegerAttr>>>.
-+  mlir::ArrayAttr create2DIntegerArrayAttr(
-+      llvm::SmallVectorImpl<llvm::SmallVector<int64_t>> &intData);
-+
-   /// Create a temporary using `fir.alloca`. This function does not hoist.
-   /// It is the callers responsibility to set the insertion point if
-   /// hoisting is required.
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/OpenMP/Passes.h llvm-project/flang/include/flang/Optimizer/OpenMP/Passes.h
---- llvm-project.upstream/flang/include/flang/Optimizer/OpenMP/Passes.h	2024-11-19 12:55:58.475825789 -0500
-+++ llvm-project/flang/include/flang/Optimizer/OpenMP/Passes.h	2024-09-09 10:42:24.000600849 -0400
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Optimizer/OpenMP/Passes.h llvm-project-trunk-atd/flang/include/flang/Optimizer/OpenMP/Passes.h
+--- llvm-project-trunk/flang/include/flang/Optimizer/OpenMP/Passes.h	2024-11-23 20:25:46.207205554 -0600
++++ llvm-project-trunk-atd/flang/include/flang/Optimizer/OpenMP/Passes.h	2024-11-23 20:26:13.799106347 -0600
 @@ -13,6 +13,7 @@
  #ifndef FORTRAN_OPTIMIZER_OPENMP_PASSES_H
  #define FORTRAN_OPTIMIZER_OPENMP_PASSES_H
@@ -5015,9 +4988,9 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/OpenMP/Pa
  #define GEN_PASS_DECL
  #define GEN_PASS_REGISTRATION
  #include "flang/Optimizer/OpenMP/Passes.h.inc"
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/OpenMP/Passes.td llvm-project/flang/include/flang/Optimizer/OpenMP/Passes.td
---- llvm-project.upstream/flang/include/flang/Optimizer/OpenMP/Passes.td	2024-11-19 12:55:58.475825789 -0500
-+++ llvm-project/flang/include/flang/Optimizer/OpenMP/Passes.td	2024-11-19 12:49:05.146149372 -0500
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Optimizer/OpenMP/Passes.td llvm-project-trunk-atd/flang/include/flang/Optimizer/OpenMP/Passes.td
+--- llvm-project-trunk/flang/include/flang/Optimizer/OpenMP/Passes.td	2024-11-23 20:25:46.207205554 -0600
++++ llvm-project-trunk-atd/flang/include/flang/Optimizer/OpenMP/Passes.td	2024-11-23 20:26:13.799106347 -0600
 @@ -10,6 +10,7 @@
  #define FORTRAN_OPTIMIZER_OPENMP_PASSES
  
@@ -5026,7 +4999,7 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/OpenMP/Pa
  
  def MapInfoFinalizationPass
      : Pass<"omp-map-info-finalization", "mlir::ModuleOp"> {
-@@ -50,4 +51,44 @@
+@@ -50,6 +51,46 @@
    ];
  }
  
@@ -5070,10 +5043,12 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/OpenMP/Pa
 +  ];
 +}
 +
- #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/OpenMP/Utils.h llvm-project/flang/include/flang/Optimizer/OpenMP/Utils.h
---- llvm-project.upstream/flang/include/flang/Optimizer/OpenMP/Utils.h	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/include/flang/Optimizer/OpenMP/Utils.h	2024-09-09 10:42:24.000600849 -0400
+ // Needs to be scheduled on Module as we create functions in it
+ def LowerWorkshare : Pass<"lower-workshare", "::mlir::ModuleOp"> {
+   let summary = "Lower workshare construct";
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Optimizer/OpenMP/Utils.h llvm-project-trunk-atd/flang/include/flang/Optimizer/OpenMP/Utils.h
+--- llvm-project-trunk/flang/include/flang/Optimizer/OpenMP/Utils.h	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/include/flang/Optimizer/OpenMP/Utils.h	2024-08-28 09:52:27.609832547 -0500
 @@ -0,0 +1,26 @@
 +//===-- Optimizer/OpenMP/Utils.h --------------------------------*- C++ -*-===//
 +//
@@ -5101,12 +5076,12 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/OpenMP/Ut
 +} // namespace flangomp
 +
 +#endif // FORTRAN_OPTIMIZER_OPENMP_UTILS_H
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/Passes/Pipelines.h llvm-project/flang/include/flang/Optimizer/Passes/Pipelines.h
---- llvm-project.upstream/flang/include/flang/Optimizer/Passes/Pipelines.h	2024-11-19 12:55:58.475825789 -0500
-+++ llvm-project/flang/include/flang/Optimizer/Passes/Pipelines.h	2024-11-19 12:49:05.146149372 -0500
-@@ -125,6 +125,15 @@
- void createHLFIRToFIRPassPipeline(
-     mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel);
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Optimizer/Passes/Pipelines.h llvm-project-trunk-atd/flang/include/flang/Optimizer/Passes/Pipelines.h
+--- llvm-project-trunk/flang/include/flang/Optimizer/Passes/Pipelines.h	2024-11-23 20:25:46.207205554 -0600
++++ llvm-project-trunk-atd/flang/include/flang/Optimizer/Passes/Pipelines.h	2024-11-23 20:26:13.799106347 -0600
+@@ -126,6 +126,15 @@
+     mlir::PassManager &pm, bool enableOpenMP,
+     llvm::OptimizationLevel optLevel = defaultOptLevel);
  
 +using DoConcurrentMappingKind =
 +    Fortran::frontend::CodeGenOptions::DoConcurrentMappingKind;
@@ -5120,7 +5095,7 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/Passes/Pi
  /// Create a pass pipeline for handling certain OpenMP transformations needed
  /// prior to FIR lowering.
  ///
-@@ -134,7 +143,8 @@
+@@ -135,7 +144,8 @@
  /// \param pm - MLIR pass manager that will hold the pipeline definition.
  /// \param isTargetDevice - Whether code is being generated for a target device
  /// rather than the host device.
@@ -5130,9 +5105,9 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/Passes/Pi
  
  #if !defined(FLANG_EXCLUDE_CODEGEN)
  void createDebugPasses(mlir::PassManager &pm,
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/Transforms/Passes.h llvm-project/flang/include/flang/Optimizer/Transforms/Passes.h
---- llvm-project.upstream/flang/include/flang/Optimizer/Transforms/Passes.h	2024-11-19 12:55:00.536014742 -0500
-+++ llvm-project/flang/include/flang/Optimizer/Transforms/Passes.h	2024-11-19 12:49:05.146149372 -0500
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Optimizer/Transforms/Passes.h llvm-project-trunk-atd/flang/include/flang/Optimizer/Transforms/Passes.h
+--- llvm-project-trunk/flang/include/flang/Optimizer/Transforms/Passes.h	2024-10-29 11:07:37.329576417 -0500
++++ llvm-project-trunk-atd/flang/include/flang/Optimizer/Transforms/Passes.h	2024-10-29 11:08:03.729483643 -0500
 @@ -10,10 +10,12 @@
  #define FORTRAN_OPTIMIZER_TRANSFORMS_PASSES_H
  
@@ -5146,9 +5121,9 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/Transform
  #include <memory>
  
  namespace mlir {
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/Transforms/Utils.h llvm-project/flang/include/flang/Optimizer/Transforms/Utils.h
---- llvm-project.upstream/flang/include/flang/Optimizer/Transforms/Utils.h	2024-02-13 10:02:16.877135793 -0500
-+++ llvm-project/flang/include/flang/Optimizer/Transforms/Utils.h	2024-09-09 10:42:24.000600849 -0400
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Optimizer/Transforms/Utils.h llvm-project-trunk-atd/flang/include/flang/Optimizer/Transforms/Utils.h
+--- llvm-project-trunk/flang/include/flang/Optimizer/Transforms/Utils.h	2024-08-27 20:04:05.576041090 -0500
++++ llvm-project-trunk-atd/flang/include/flang/Optimizer/Transforms/Utils.h	2024-08-28 09:52:27.609832547 -0500
 @@ -13,8 +13,13 @@
  #ifndef FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H
  #define FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H
@@ -5163,9 +5138,9 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Optimizer/Transform
  using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
      fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
      mlir::Value, mlir::Value, const llvm::SmallVectorImpl<mlir::Value> &)>;
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Runtime/allocator-registry.h llvm-project/flang/include/flang/Runtime/allocator-registry.h
---- llvm-project.upstream/flang/include/flang/Runtime/allocator-registry.h	2024-11-19 12:55:00.538014736 -0500
-+++ llvm-project/flang/include/flang/Runtime/allocator-registry.h	2024-11-19 12:49:05.148149365 -0500
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Runtime/allocator-registry.h llvm-project-trunk-atd/flang/include/flang/Runtime/allocator-registry.h
+--- llvm-project-trunk/flang/include/flang/Runtime/allocator-registry.h	2024-10-18 17:40:44.652867743 -0500
++++ llvm-project-trunk-atd/flang/include/flang/Runtime/allocator-registry.h	2024-10-18 17:40:53.584776167 -0500
 @@ -13,6 +13,8 @@
  #include <cstdlib>
  #include <vector>
@@ -5184,9 +5159,9 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Runtime/allocator-r
  #define MAX_ALLOCATOR 7 // 3 bits are reserved in the descriptor.
  
  namespace Fortran::runtime {
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Runtime/freestanding-tools.h llvm-project/flang/include/flang/Runtime/freestanding-tools.h
---- llvm-project.upstream/flang/include/flang/Runtime/freestanding-tools.h	2024-11-19 12:55:58.476825786 -0500
-+++ llvm-project/flang/include/flang/Runtime/freestanding-tools.h	2024-11-19 12:49:05.148149365 -0500
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Runtime/freestanding-tools.h llvm-project-trunk-atd/flang/include/flang/Runtime/freestanding-tools.h
+--- llvm-project-trunk/flang/include/flang/Runtime/freestanding-tools.h	2024-11-23 20:25:46.207205554 -0600
++++ llvm-project-trunk-atd/flang/include/flang/Runtime/freestanding-tools.h	2024-11-23 20:26:13.803106331 -0600
 @@ -23,6 +23,16 @@
  #define STD_FILL_N_UNSUPPORTED 1
  #endif
@@ -5297,9 +5272,9 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Runtime/freestandin
    } else if (to < from) {
      while (count--) {
        *to++ = *from++;
-@@ -109,7 +183,11 @@
- using std::memmove;
- #endif // !STD_MEMMOVE_UNSUPPORTED
+@@ -118,7 +192,11 @@
+ }
+ #endif
  
 -#if STD_STRLEN_UNSUPPORTED
 +#if STD_STRLEN_USE_BUILTIN
@@ -5310,9 +5285,9 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Runtime/freestandin
  // Provides alternative implementation for std::strlen(), if
  // it is not supported.
  static inline RT_API_ATTRS std::size_t strlen(const char *str) {
-diff -Naur -x .git llvm-project.upstream/flang/include/flang/Tools/CrossToolHelpers.h llvm-project/flang/include/flang/Tools/CrossToolHelpers.h
---- llvm-project.upstream/flang/include/flang/Tools/CrossToolHelpers.h	2024-11-19 12:55:58.477825783 -0500
-+++ llvm-project/flang/include/flang/Tools/CrossToolHelpers.h	2024-11-19 12:49:05.150149359 -0500
+diff -Naur -x .git llvm-project-trunk/flang/include/flang/Tools/CrossToolHelpers.h llvm-project-trunk-atd/flang/include/flang/Tools/CrossToolHelpers.h
+--- llvm-project-trunk/flang/include/flang/Tools/CrossToolHelpers.h	2024-11-23 20:25:46.207205554 -0600
++++ llvm-project-trunk-atd/flang/include/flang/Tools/CrossToolHelpers.h	2024-11-23 20:26:13.803106331 -0600
 @@ -7,7 +7,7 @@
  //===----------------------------------------------------------------------===//
  // A header file for containing functionallity that is used across Flang tools,
@@ -5322,7 +5297,7 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Tools/CrossToolHelp
  //===----------------------------------------------------------------------===//
  
  #ifndef FORTRAN_TOOLS_CROSS_TOOL_HELPERS_H
-@@ -164,7 +164,7 @@
+@@ -165,7 +165,7 @@
    bool OpenMPIsTargetDevice = false;
    bool OpenMPIsGPU = false;
    bool OpenMPForceUSM = false;
@@ -5331,9 +5306,9 @@ diff -Naur -x .git llvm-project.upstream/flang/include/flang/Tools/CrossToolHelp
    std::string OMPHostIRFile = {};
    std::vector<llvm::Triple> OMPTargetTriples = {};
    bool NoGPULib = false;
-diff -Naur -x .git llvm-project.upstream/flang/lib/Frontend/CompilerInvocation.cpp llvm-project/flang/lib/Frontend/CompilerInvocation.cpp
---- llvm-project.upstream/flang/lib/Frontend/CompilerInvocation.cpp	2024-11-19 12:55:00.546014710 -0500
-+++ llvm-project/flang/lib/Frontend/CompilerInvocation.cpp	2024-11-19 12:49:05.154149347 -0500
+diff -Naur -x .git llvm-project-trunk/flang/lib/Frontend/CompilerInvocation.cpp llvm-project-trunk-atd/flang/lib/Frontend/CompilerInvocation.cpp
+--- llvm-project-trunk/flang/lib/Frontend/CompilerInvocation.cpp	2024-11-23 20:30:46.674124179 -0600
++++ llvm-project-trunk-atd/flang/lib/Frontend/CompilerInvocation.cpp	2024-11-06 08:36:36.167053791 -0600
 @@ -66,8 +66,8 @@
  static bool parseShowColorsArgs(const llvm::opt::ArgList &args,
                                  bool defaultColor = true) {
@@ -5420,9 +5395,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Frontend/CompilerInvocation.c
    fortranOptions.predefinitions.emplace_back("__flang__", "1");
    fortranOptions.predefinitions.emplace_back("__flang_major__",
                                               FLANG_VERSION_MAJOR_STRING);
-diff -Naur -x .git llvm-project.upstream/flang/lib/Frontend/FrontendActions.cpp llvm-project/flang/lib/Frontend/FrontendActions.cpp
---- llvm-project.upstream/flang/lib/Frontend/FrontendActions.cpp	2024-11-19 12:55:58.478825780 -0500
-+++ llvm-project/flang/lib/Frontend/FrontendActions.cpp	2024-11-19 12:49:05.155149344 -0500
+diff -Naur -x .git llvm-project-trunk/flang/lib/Frontend/FrontendActions.cpp llvm-project-trunk-atd/flang/lib/Frontend/FrontendActions.cpp
+--- llvm-project-trunk/flang/lib/Frontend/FrontendActions.cpp	2024-11-23 20:25:46.211205540 -0600
++++ llvm-project-trunk-atd/flang/lib/Frontend/FrontendActions.cpp	2024-11-23 20:26:13.807106317 -0600
 @@ -233,7 +233,7 @@
      llvm::SMDiagnostic err;
      llvmModule = llvm::parseIRFile(getCurrentInput().getFile(), err, *llvmCtx);
@@ -5476,9 +5451,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Frontend/FrontendActions.cpp
    }
  
    pm.enableVerifier(/*verifyPasses=*/true);
-diff -Naur -x .git llvm-project.upstream/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp llvm-project/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
---- llvm-project.upstream/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp	2024-11-19 12:55:00.546014710 -0500
-+++ llvm-project/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp	2024-05-28 09:00:55.929822256 -0400
+diff -Naur -x .git llvm-project-trunk/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp llvm-project-trunk-atd/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
+--- llvm-project-trunk/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp	2024-10-18 17:40:44.660867660 -0500
++++ llvm-project-trunk-atd/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp	2024-08-27 20:12:45.189833793 -0500
 @@ -154,7 +154,8 @@
    // Honor -help.
    if (flang->getFrontendOpts().showHelp) {
@@ -5489,9 +5464,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/FrontendTool/ExecuteCompilerI
          /*ShowHidden=*/false, /*ShowAllAliases=*/false,
          llvm::opt::Visibility(clang::driver::options::FC1Option));
      return true;
-diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/DirectivesCommon.h llvm-project/flang/lib/Lower/DirectivesCommon.h
---- llvm-project.upstream/flang/lib/Lower/DirectivesCommon.h	2024-11-19 12:55:00.552014690 -0500
-+++ llvm-project/flang/lib/Lower/DirectivesCommon.h	2024-11-19 12:49:05.160149328 -0500
+diff -Naur -x .git llvm-project-trunk/flang/lib/Lower/DirectivesCommon.h llvm-project-trunk-atd/flang/lib/Lower/DirectivesCommon.h
+--- llvm-project-trunk/flang/lib/Lower/DirectivesCommon.h	2024-11-14 15:28:48.598612953 -0600
++++ llvm-project-trunk-atd/flang/lib/Lower/DirectivesCommon.h	2024-11-14 15:29:47.010382998 -0600
 @@ -984,7 +984,10 @@
          // If it is a scalar subscript, then the upper bound
          // is equal to the lower bound, and the extent is one.
@@ -5504,9 +5479,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/DirectivesCommon.h llvm
        } else {
          asFortran << ':';
          Fortran::semantics::MaybeExpr upper =
-diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/ClauseProcessor.cpp llvm-project/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
---- llvm-project.upstream/flang/lib/Lower/OpenMP/ClauseProcessor.cpp	2024-11-19 12:55:58.481825770 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/ClauseProcessor.cpp	2024-11-19 12:49:05.162149322 -0500
+diff -Naur -x .git llvm-project-trunk/flang/lib/Lower/OpenMP/ClauseProcessor.cpp llvm-project-trunk-atd/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+--- llvm-project-trunk/flang/lib/Lower/OpenMP/ClauseProcessor.cpp	2024-11-23 20:25:46.215205525 -0600
++++ llvm-project-trunk-atd/flang/lib/Lower/OpenMP/ClauseProcessor.cpp	2024-11-23 20:26:13.811106303 -0600
 @@ -11,8 +11,8 @@
  //===----------------------------------------------------------------------===//
  
@@ -5517,160 +5492,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/ClauseProcessor.
  #include "flang/Lower/PFTBuilder.h"
  #include "flang/Parser/tools.h"
  #include "flang/Semantics/tools.h"
-@@ -894,14 +894,15 @@
-     lower::StatementContext &stmtCtx, mlir::Location clauseLocation,
-     const omp::ObjectList &objects,
-     llvm::omp::OpenMPOffloadMappingFlags mapTypeBits,
--    std::map<const semantics::Symbol *,
--             llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
-+    std::map<Object, OmpMapParentAndMemberData> &parentMemberIndices,
-     llvm::SmallVectorImpl<mlir::Value> &mapVars,
-     llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms) const {
-   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-+
-   for (const omp::Object &object : objects) {
-     llvm::SmallVector<mlir::Value> bounds;
-     std::stringstream asFortran;
-+    std::optional<omp::Object> parentObj;
- 
-     lower::AddrAndBoundsInfo info =
-         lower::gatherDataOperandAddrAndBounds<mlir::omp::MapBoundsOp,
-@@ -910,24 +911,44 @@
-             object.ref(), clauseLocation, asFortran, bounds,
-             treatIndexAsSection);
- 
-+    mlir::Value baseOp = info.rawInput;
-+    if (object.sym()->owner().IsDerivedType()) {
-+      omp::ObjectList objectList = gatherObjects(object, semaCtx);
-+      assert(!objectList.empty() &&
-+             "could not find parent objects of derived type member");
-+      parentObj = objectList[0];
-+      auto insert = parentMemberIndices.emplace(parentObj.value(),
-+                                                OmpMapParentAndMemberData{});
-+      insert.first->second.parentObjList.push_back(parentObj.value());
-+
-+      if (isMemberOrParentAllocatableOrPointer(object, semaCtx)) {
-+        llvm::SmallVector<int64_t> indices;
-+        generateMemberPlacementIndices(object, indices, semaCtx);
-+        baseOp = createParentSymAndGenIntermediateMaps(
-+            clauseLocation, converter, semaCtx, stmtCtx, objectList, indices,
-+            parentMemberIndices[parentObj.value()], asFortran.str(),
-+            mapTypeBits);
-+      }
-+    }
-+
-     // Explicit map captures are captured ByRef by default,
-     // optimisation passes may alter this to ByCopy or other capture
-     // types to optimise
--    mlir::Value baseOp = info.rawInput;
-     auto location = mlir::NameLoc::get(
-         mlir::StringAttr::get(firOpBuilder.getContext(), asFortran.str()),
-         baseOp.getLoc());
-     mlir::omp::MapInfoOp mapOp = createMapInfoOp(
-         firOpBuilder, location, baseOp,
-         /*varPtrPtr=*/mlir::Value{}, asFortran.str(), bounds,
--        /*members=*/{}, /*membersIndex=*/mlir::DenseIntElementsAttr{},
-+        /*members=*/{}, /*membersIndex=*/mlir::ArrayAttr{},
-         static_cast<
-             std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
-             mapTypeBits),
-         mlir::omp::VariableCaptureKind::ByRef, baseOp.getType());
- 
--    if (object.sym()->owner().IsDerivedType()) {
--      addChildIndexAndMapToParent(object, parentMemberIndices, mapOp, semaCtx);
-+    if (parentObj.has_value()) {
-+      addChildIndexAndMapToParent(
-+          object, parentMemberIndices[parentObj.value()], mapOp, semaCtx);
-     } else {
-       mapVars.push_back(mapOp);
-       mapSyms.push_back(object.sym());
-@@ -945,9 +966,7 @@
-   llvm::SmallVector<const semantics::Symbol *> localMapSyms;
-   llvm::SmallVectorImpl<const semantics::Symbol *> *ptrMapSyms =
-       mapSyms ? mapSyms : &localMapSyms;
--  std::map<const semantics::Symbol *,
--           llvm::SmallVector<OmpMapMemberIndicesData>>
--      parentMemberIndices;
-+  std::map<Object, OmpMapParentAndMemberData> parentMemberIndices;
- 
-   auto process = [&](const omp::clause::Map &clause,
-                      const parser::CharBlock &source) {
-@@ -1007,17 +1026,15 @@
-   };
- 
-   bool clauseFound = findRepeatableClause<omp::clause::Map>(process);
--  insertChildMapInfoIntoParent(converter, parentMemberIndices, result.mapVars,
--                               *ptrMapSyms);
-+  insertChildMapInfoIntoParent(converter, semaCtx, stmtCtx, parentMemberIndices,
-+                               result.mapVars, *ptrMapSyms);
- 
-   return clauseFound;
- }
- 
- bool ClauseProcessor::processMotionClauses(lower::StatementContext &stmtCtx,
-                                            mlir::omp::MapClauseOps &result) {
--  std::map<const semantics::Symbol *,
--           llvm::SmallVector<OmpMapMemberIndicesData>>
--      parentMemberIndices;
-+  std::map<Object, OmpMapParentAndMemberData> parentMemberIndices;
-   llvm::SmallVector<const semantics::Symbol *> mapSymbols;
- 
-   auto callbackFn = [&](const auto &clause, const parser::CharBlock &source) {
-@@ -1044,8 +1061,8 @@
-   clauseFound =
-       findRepeatableClause<omp::clause::From>(callbackFn) || clauseFound;
- 
--  insertChildMapInfoIntoParent(converter, parentMemberIndices, result.mapVars,
--                               mapSymbols);
-+  insertChildMapInfoIntoParent(converter, semaCtx, stmtCtx, parentMemberIndices,
-+                               result.mapVars, mapSymbols);
-   return clauseFound;
- }
- 
-@@ -1107,9 +1124,7 @@
- bool ClauseProcessor::processUseDeviceAddr(
-     lower::StatementContext &stmtCtx, mlir::omp::UseDeviceAddrClauseOps &result,
-     llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceSyms) const {
--  std::map<const semantics::Symbol *,
--           llvm::SmallVector<OmpMapMemberIndicesData>>
--      parentMemberIndices;
-+  std::map<Object, OmpMapParentAndMemberData> parentMemberIndices;
-   bool clauseFound = findRepeatableClause<omp::clause::UseDeviceAddr>(
-       [&](const omp::clause::UseDeviceAddr &clause,
-           const parser::CharBlock &source) {
-@@ -1122,7 +1137,7 @@
-                           useDeviceSyms);
-       });
- 
--  insertChildMapInfoIntoParent(converter, parentMemberIndices,
-+  insertChildMapInfoIntoParent(converter, semaCtx, stmtCtx, parentMemberIndices,
-                                result.useDeviceAddrVars, useDeviceSyms);
-   return clauseFound;
- }
-@@ -1130,9 +1145,8 @@
- bool ClauseProcessor::processUseDevicePtr(
-     lower::StatementContext &stmtCtx, mlir::omp::UseDevicePtrClauseOps &result,
-     llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceSyms) const {
--  std::map<const semantics::Symbol *,
--           llvm::SmallVector<OmpMapMemberIndicesData>>
--      parentMemberIndices;
-+  std::map<Object, OmpMapParentAndMemberData> parentMemberIndices;
-+
-   bool clauseFound = findRepeatableClause<omp::clause::UseDevicePtr>(
-       [&](const omp::clause::UseDevicePtr &clause,
-           const parser::CharBlock &source) {
-@@ -1145,7 +1159,7 @@
-                           useDeviceSyms);
-       });
- 
--  insertChildMapInfoIntoParent(converter, parentMemberIndices,
-+  insertChildMapInfoIntoParent(converter, semaCtx, stmtCtx, parentMemberIndices,
-                                result.useDevicePtrVars, useDeviceSyms);
-   return clauseFound;
- }
-diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/ClauseProcessor.h llvm-project/flang/lib/Lower/OpenMP/ClauseProcessor.h
---- llvm-project.upstream/flang/lib/Lower/OpenMP/ClauseProcessor.h	2024-11-19 12:55:58.481825770 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/ClauseProcessor.h	2024-11-19 12:49:05.162149322 -0500
+diff -Naur -x .git llvm-project-trunk/flang/lib/Lower/OpenMP/ClauseProcessor.h llvm-project-trunk-atd/flang/lib/Lower/OpenMP/ClauseProcessor.h
+--- llvm-project-trunk/flang/lib/Lower/OpenMP/ClauseProcessor.h	2024-11-23 20:25:46.215205525 -0600
++++ llvm-project-trunk-atd/flang/lib/Lower/OpenMP/ClauseProcessor.h	2024-11-23 20:26:13.811106303 -0600
 @@ -12,12 +12,12 @@
  #ifndef FORTRAN_LOWER_CLAUSEPROCESSOR_H
  #define FORTRAN_LOWER_CLAUSEPROCESSOR_H
@@ -5686,19 +5510,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/ClauseProcessor.
  #include "flang/Optimizer/Builder/Todo.h"
  #include "flang/Parser/dump-parse-tree.h"
  #include "flang/Parser/parse-tree.h"
-@@ -166,8 +166,7 @@
-       lower::StatementContext &stmtCtx, mlir::Location clauseLocation,
-       const omp::ObjectList &objects,
-       llvm::omp::OpenMPOffloadMappingFlags mapTypeBits,
--      std::map<const semantics::Symbol *,
--               llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
-+      std::map<Object, OmpMapParentAndMemberData> &parentMemberIndices,
-       llvm::SmallVectorImpl<mlir::Value> &mapVars,
-       llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms) const;
- 
-diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Clauses.cpp llvm-project/flang/lib/Lower/OpenMP/Clauses.cpp
---- llvm-project.upstream/flang/lib/Lower/OpenMP/Clauses.cpp	2024-11-19 12:55:58.481825770 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/Clauses.cpp	2024-11-19 12:49:05.162149322 -0500
+diff -Naur -x .git llvm-project-trunk/flang/lib/Lower/OpenMP/Clauses.cpp llvm-project-trunk-atd/flang/lib/Lower/OpenMP/Clauses.cpp
+--- llvm-project-trunk/flang/lib/Lower/OpenMP/Clauses.cpp	2024-11-23 20:30:46.674124179 -0600
++++ llvm-project-trunk-atd/flang/lib/Lower/OpenMP/Clauses.cpp	2024-11-23 20:26:13.811106303 -0600
 @@ -6,7 +6,7 @@
  //
  //===----------------------------------------------------------------------===//
@@ -5708,10 +5522,10 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Clauses.cpp llvm
  
  #include "flang/Common/idioms.h"
  #include "flang/Evaluate/expression.h"
-diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Clauses.h llvm-project/flang/lib/Lower/OpenMP/Clauses.h
---- llvm-project.upstream/flang/lib/Lower/OpenMP/Clauses.h	2024-11-19 12:55:58.482825767 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/Clauses.h	1969-12-31 19:00:00.000000000 -0500
-@@ -1,319 +0,0 @@
+diff -Naur -x .git llvm-project-trunk/flang/lib/Lower/OpenMP/Clauses.h llvm-project-trunk-atd/flang/lib/Lower/OpenMP/Clauses.h
+--- llvm-project-trunk/flang/lib/Lower/OpenMP/Clauses.h	2024-11-23 20:30:46.674124179 -0600
++++ llvm-project-trunk-atd/flang/lib/Lower/OpenMP/Clauses.h	1969-12-31 18:00:00.000000000 -0600
+@@ -1,330 +0,0 @@
 -//===-- Clauses.h -- OpenMP clause handling -------------------------------===//
 -//
 -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -5765,6 +5579,13 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Clauses.h llvm-p
 -    return designator == other.designator;
 -  }
 -
+-  // Defining an "ordering" which allows types derived from this to be
+-  // utilised in maps and other containers that require comparison
+-  // operators for ordering
+-  bool operator<(const IdTyTemplate &other) const {
+-    return symbol < other.symbol;
+-  }
+-
 -  operator bool() const { return symbol != nullptr; }
 -};
 -
@@ -5786,6 +5607,10 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Clauses.h llvm-p
 -  Fortran::semantics::Symbol *sym() const { return identity.symbol; }
 -  const std::optional<ExprTy> &ref() const { return identity.designator; }
 -
+-  bool operator<(const ObjectT<IdTy, ExprTy> &other) const {
+-    return identity < other.identity;
+-  }
+-
 -  IdTy identity;
 -};
 -} // namespace tomp::type
@@ -6031,9 +5856,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Clauses.h llvm-p
 -} // namespace Fortran::lower::omp
 -
 -#endif // FORTRAN_LOWER_OPENMP_CLAUSES_H
-diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp llvm-project/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
---- llvm-project.upstream/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp	2024-11-19 12:55:00.554014684 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp	2024-11-19 12:49:05.163149319 -0500
+diff -Naur -x .git llvm-project-trunk/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp llvm-project-trunk-atd/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+--- llvm-project-trunk/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp	2024-10-18 17:40:44.668867578 -0500
++++ llvm-project-trunk-atd/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp	2024-10-18 17:40:53.600776003 -0500
 @@ -12,8 +12,8 @@
  
  #include "DataSharingProcessor.h"
@@ -6092,9 +5917,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/DataSharingProce
    symToPrivatizer[sym] = privatizerOp;
  }
  
-diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/DataSharingProcessor.h llvm-project/flang/lib/Lower/OpenMP/DataSharingProcessor.h
---- llvm-project.upstream/flang/lib/Lower/OpenMP/DataSharingProcessor.h	2024-11-19 12:54:46.304061023 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/DataSharingProcessor.h	2024-09-24 17:16:21.628863937 -0400
+diff -Naur -x .git llvm-project-trunk/flang/lib/Lower/OpenMP/DataSharingProcessor.h llvm-project-trunk-atd/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+--- llvm-project-trunk/flang/lib/Lower/OpenMP/DataSharingProcessor.h	2024-09-13 09:56:37.352633008 -0500
++++ llvm-project-trunk-atd/flang/lib/Lower/OpenMP/DataSharingProcessor.h	2024-09-13 09:59:03.537878356 -0500
 @@ -12,9 +12,9 @@
  #ifndef FORTRAN_LOWER_DATASHARINGPROCESSOR_H
  #define FORTRAN_LOWER_DATASHARINGPROCESSOR_H
@@ -6161,9 +5986,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/DataSharingProce
  
    void pushLoopIV(mlir::Value iv) { loopIVs.push_back(iv); }
  
-diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Decomposer.cpp llvm-project/flang/lib/Lower/OpenMP/Decomposer.cpp
---- llvm-project.upstream/flang/lib/Lower/OpenMP/Decomposer.cpp	2024-08-21 09:31:26.058536470 -0400
-+++ llvm-project/flang/lib/Lower/OpenMP/Decomposer.cpp	2024-08-22 09:12:05.207004244 -0400
+diff -Naur -x .git llvm-project-trunk/flang/lib/Lower/OpenMP/Decomposer.cpp llvm-project-trunk-atd/flang/lib/Lower/OpenMP/Decomposer.cpp
+--- llvm-project-trunk/flang/lib/Lower/OpenMP/Decomposer.cpp	2024-08-27 20:04:05.600041015 -0500
++++ llvm-project-trunk-atd/flang/lib/Lower/OpenMP/Decomposer.cpp	2024-08-28 08:37:25.108601360 -0500
 @@ -12,8 +12,8 @@
  
  #include "Decomposer.h"
@@ -6175,9 +6000,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Decomposer.cpp l
  #include "flang/Lower/PFTBuilder.h"
  #include "flang/Semantics/semantics.h"
  #include "flang/Tools/CrossToolHelpers.h"
-diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Decomposer.h llvm-project/flang/lib/Lower/OpenMP/Decomposer.h
---- llvm-project.upstream/flang/lib/Lower/OpenMP/Decomposer.h	2024-08-21 09:31:26.058536470 -0400
-+++ llvm-project/flang/lib/Lower/OpenMP/Decomposer.h	2024-08-22 09:12:05.207004244 -0400
+diff -Naur -x .git llvm-project-trunk/flang/lib/Lower/OpenMP/Decomposer.h llvm-project-trunk-atd/flang/lib/Lower/OpenMP/Decomposer.h
+--- llvm-project-trunk/flang/lib/Lower/OpenMP/Decomposer.h	2024-08-27 20:04:05.600041015 -0500
++++ llvm-project-trunk-atd/flang/lib/Lower/OpenMP/Decomposer.h	2024-08-28 08:37:25.108601360 -0500
 @@ -8,7 +8,7 @@
  #ifndef FORTRAN_LOWER_OPENMP_DECOMPOSER_H
  #define FORTRAN_LOWER_OPENMP_DECOMPOSER_H
@@ -6187,9 +6012,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Decomposer.h llv
  #include "mlir/IR/BuiltinOps.h"
  #include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
  #include "llvm/Frontend/OpenMP/OMP.h"
-diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-project/flang/lib/Lower/OpenMP/OpenMP.cpp
---- llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp	2024-11-19 12:55:58.482825767 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/OpenMP.cpp	2024-11-19 12:49:05.163149319 -0500
+diff -Naur -x .git llvm-project-trunk/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-project-trunk-atd/flang/lib/Lower/OpenMP/OpenMP.cpp
+--- llvm-project-trunk/flang/lib/Lower/OpenMP/OpenMP.cpp	2024-11-23 20:25:46.215205525 -0600
++++ llvm-project-trunk-atd/flang/lib/Lower/OpenMP/OpenMP.cpp	2024-11-23 20:26:13.811106303 -0600
 @@ -13,16 +13,16 @@
  #include "flang/Lower/OpenMP.h"
  
@@ -6209,7 +6034,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  #include "flang/Lower/StatementContext.h"
  #include "flang/Lower/SymbolMap.h"
  #include "flang/Optimizer/Builder/BoxValue.h"
-@@ -45,6 +45,19 @@
+@@ -46,6 +46,19 @@
  // Code generation helper functions
  //===----------------------------------------------------------------------===//
  
@@ -6229,7 +6054,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  namespace {
  /// Structure holding the information needed to create and bind entry block
  /// arguments associated to a single clause.
-@@ -63,6 +76,7 @@
+@@ -64,6 +77,7 @@
  /// Structure holding the information needed to create and bind entry block
  /// arguments associated to all clauses that can define them.
  struct EntryBlockArgs {
@@ -6237,7 +6062,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    EntryBlockArgsEntry inReduction;
    EntryBlockArgsEntry map;
    EntryBlockArgsEntry priv;
-@@ -85,18 +99,146 @@
+@@ -86,18 +100,146 @@
  
    auto getVars() const {
      return llvm::concat<const mlir::Value>(
@@ -6391,7 +6216,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  
  /// Bind symbols to their corresponding entry block arguments.
  ///
-@@ -219,6 +361,8 @@
+@@ -220,6 +362,8 @@
    };
  
    // Process in clause name alphabetical order to match block arguments order.
@@ -6400,7 +6225,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    bindPrivateLike(args.inReduction.syms, args.inReduction.vars,
                    op.getInReductionBlockArgs());
    bindMapLike(args.map.syms, op.getMapBlockArgs());
-@@ -256,6 +400,246 @@
+@@ -257,6 +401,246 @@
    });
  }
  
@@ -6647,7 +6472,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  static lower::pft::Evaluation *
  getCollapsedLoopEval(lower::pft::Evaluation &eval, int collapseValue) {
    // Return the Evaluation of the innermost collapsed loop, or the current one
-@@ -638,11 +1022,11 @@
+@@ -639,11 +1023,11 @@
  
    llvm::SmallVector<mlir::Type> types;
    llvm::SmallVector<mlir::Location> locs;
@@ -6664,7 +6489,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    types.reserve(numVars);
    locs.reserve(numVars);
  
-@@ -655,6 +1039,7 @@
+@@ -656,6 +1040,7 @@
  
    // Populate block arguments in clause name alphabetical order to match
    // expected order by the BlockArgOpenMPOpInterface.
@@ -6672,7 +6497,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    extractTypeLoc(args.inReduction.vars);
    extractTypeLoc(args.map.vars);
    extractTypeLoc(args.priv.vars);
-@@ -783,6 +1168,7 @@
+@@ -784,6 +1169,7 @@
      firOpBuilder.createBlock(&op.getRegion(0));
      return {};
    }();
@@ -6680,7 +6505,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    // Mark the earliest insertion point.
    mlir::Operation *marker = insertMarker(firOpBuilder);
  
-@@ -805,6 +1191,7 @@
+@@ -806,6 +1192,7 @@
      tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval,
                      Fortran::lower::omp::isLastItemInQueue(item, queue));
      tempDsp->processStep1();
@@ -6688,7 +6513,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    }
  
    if (info.dir == llvm::omp::Directive::OMPD_parallel) {
-@@ -894,14 +1281,14 @@
+@@ -895,14 +1282,14 @@
  
        if (!info.dsp) {
          assert(tempDsp.has_value());
@@ -6705,7 +6530,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
        }
      }
    }
-@@ -996,7 +1383,13 @@
+@@ -997,7 +1384,13 @@
  
    mlir::Region &region = targetOp.getRegion();
    mlir::Block *entryBlock = genEntryBlock(converter, args, region);
@@ -6719,7 +6544,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  
    // Check if cloning the bounds introduced any dependency on the outer region.
    // If so, then either clone them as well if they are MemoryEffectFree, or else
-@@ -1007,9 +1400,11 @@
+@@ -1008,9 +1401,11 @@
    while (!valuesDefinedAbove.empty()) {
      for (mlir::Value val : valuesDefinedAbove) {
        mlir::Operation *valOp = val.getDefiningOp();
@@ -6731,16 +6556,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
          val.replaceUsesWithIf(clonedOp->getResult(0),
                                [entryBlock](mlir::OpOperand &use) {
                                  return use.getOwner()->getBlock() == entryBlock;
-@@ -1028,7 +1423,7 @@
-             firOpBuilder, copyVal.getLoc(), copyVal,
-             /*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
-             /*members=*/llvm::SmallVector<mlir::Value>{},
--            /*membersIndex=*/mlir::DenseIntElementsAttr{},
-+            /*membersIndex=*/mlir::ArrayAttr{},
-             static_cast<
-                 std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
-                 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
-@@ -1095,7 +1490,7 @@
+@@ -1096,7 +1491,7 @@
      genNestedEvaluations(converter, eval);
    }
  
@@ -6749,7 +6565,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  }
  
  template <typename OpTy, typename... Args>
-@@ -1172,7 +1567,10 @@
+@@ -1173,7 +1568,10 @@
                     mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps,
                     llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
    ClauseProcessor cp(converter, semaCtx, clauses);
@@ -6761,7 +6577,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr();
  }
  
-@@ -1202,7 +1600,10 @@
+@@ -1215,7 +1613,10 @@
    ClauseProcessor cp(converter, semaCtx, clauses);
    cp.processAllocate(clauseOps);
    cp.processIf(llvm::omp::Directive::OMPD_parallel, clauseOps);
@@ -6773,7 +6589,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    cp.processProcBind(clauseOps);
    cp.processReduction(loc, clauseOps, reductionSyms);
  }
-@@ -1244,13 +1645,12 @@
+@@ -1257,13 +1658,12 @@
    cp.processAllocate(clauseOps);
    cp.processCopyprivate(loc, clauseOps);
    cp.processNowait(clauseOps);
@@ -6789,7 +6605,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
      mlir::omp::TargetOperands &clauseOps,
      llvm::SmallVectorImpl<const semantics::Symbol *> &hasDeviceAddrSyms,
      llvm::SmallVectorImpl<const semantics::Symbol *> &isDevicePtrSyms,
-@@ -1259,22 +1659,27 @@
+@@ -1272,22 +1672,27 @@
    cp.processDepend(clauseOps);
    cp.processDevice(stmtCtx, clauseOps);
    cp.processHasDeviceAddr(clauseOps, hasDeviceAddrSyms);
@@ -6823,7 +6639,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  }
  
  static void genTargetDataClauses(
-@@ -1334,7 +1739,6 @@
+@@ -1347,7 +1752,6 @@
    cp.processMergeable(clauseOps);
    cp.processPriority(stmtCtx, clauseOps);
    cp.processUntied(clauseOps);
@@ -6831,8 +6647,8 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  
    cp.processTODO<clause::Affinity, clause::Detach, clause::InReduction,
                   clause::Mergeable>(loc, llvm::omp::Directive::OMPD_task);
-@@ -1359,19 +1763,21 @@
-       loc, llvm::omp::Directive::OMPD_taskwait);
+@@ -1381,19 +1785,21 @@
+   cp.processNowait(clauseOps);
  }
  
 -static void genTeamsClauses(lower::AbstractConverter &converter,
@@ -6862,7 +6678,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  }
  
  static void genWsloopClauses(
-@@ -1456,6 +1862,7 @@
+@@ -1478,6 +1884,7 @@
          std::pair<mlir::omp::BlockArgOpenMPOpInterface, const EntryBlockArgs &>>
          wrapperArgs,
      llvm::omp::Directive directive, DataSharingProcessor &dsp) {
@@ -6870,7 +6686,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    auto ivCallback = [&](mlir::Operation *op) {
      genLoopVars(op, converter, loc, iv, wrapperArgs);
      return llvm::SmallVector<const semantics::Symbol *>(iv);
-@@ -1464,13 +1871,15 @@
+@@ -1486,13 +1893,15 @@
    auto *nestedEval =
        getCollapsedLoopEval(eval, getCollapseValue(item->clauses));
  
@@ -6886,8 +6702,18 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
 +  return loopNestOp;
  }
  
- static mlir::omp::MaskedOp
-@@ -1575,6 +1984,8 @@
+ static void genLoopOp(lower::AbstractConverter &converter,
+@@ -1509,7 +1918,8 @@
+   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
+                            /*shouldCollectPreDeterminedSymbols=*/true,
+                            /*useDelayedPrivatization=*/true, &symTable);
+-  dsp.processStep1(&loopClauseOps);
++  dsp.processStep1();
++  dsp.processStep2(&loopClauseOps);
+ 
+   mlir::omp::LoopNestOperands loopNestClauseOps;
+   llvm::SmallVector<const semantics::Symbol *> iv;
+@@ -1631,6 +2041,8 @@
    DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                             lower::omp::isLastItemInQueue(item, queue));
    dsp.processStep1();
@@ -6896,7 +6722,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  
    List<Clause> nonDsaClauses;
    List<const clause::Lastprivate *> lastprivates;
-@@ -1631,8 +2042,8 @@
+@@ -1687,8 +2099,8 @@
      }
  
      ConstructQueue sectionQueue{buildConstructQueue(
@@ -6907,7 +6733,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  
      builder.setInsertionPoint(terminator);
      genOpWithBody<mlir::omp::SectionOp>(
-@@ -1668,7 +2079,7 @@
+@@ -1724,7 +2136,7 @@
  
    // Perform DataSharingProcessor's step2 out of SECTIONS
    builder.setInsertionPointAfter(sectionsOp.getOperation());
@@ -6916,7 +6742,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    // Emit implicit barrier to synchronize threads and avoid data
    // races on post-update of lastprivate variables when `nowait`
    // clause is present.
-@@ -1709,23 +2120,27 @@
+@@ -1765,23 +2177,27 @@
              ConstructQueue::const_iterator item) {
    fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
    lower::StatementContext stmtCtx;
@@ -6953,7 +6779,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  
    // 5.8.1 Implicit Data-Mapping Attribute Rules
    // The following code follows the implicit data-mapping rules to map all the
-@@ -1749,84 +2164,83 @@
+@@ -1805,84 +2221,83 @@
        if (llvm::is_contained(mapSyms, common))
          return;
  
@@ -7076,7 +6902,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
 -      mlir::Value mapOp = createMapInfoOp(
 -          firOpBuilder, location, baseOp, /*varPtrPtr=*/mlir::Value{},
 -          name.str(), bounds, /*members=*/{},
--          /*membersIndex=*/mlir::DenseIntElementsAttr{},
+-          /*membersIndex=*/mlir::ArrayAttr{},
 -          static_cast<
 -              std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
 -              mapFlag),
@@ -7107,7 +6933,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    };
    lower::pft::visitAllSymbols(eval, captureImplicitMap);
  
-@@ -1836,6 +2250,7 @@
+@@ -1892,6 +2307,7 @@
    extractMappedBaseValues(clauseOps.mapVars, mapBaseValues);
  
    EntryBlockArgs args;
@@ -7115,7 +6941,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    // TODO: Add in_reduction syms and vars.
    args.map.syms = mapSyms;
    args.map.vars = mapBaseValues;
-@@ -1844,6 +2259,10 @@
+@@ -1900,6 +2316,10 @@
  
    genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, args, loc,
                      queue, item, dsp);
@@ -7126,7 +6952,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    return targetOp;
  }
  
-@@ -1926,7 +2345,8 @@
+@@ -1982,7 +2402,8 @@
    DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                             lower::omp::isLastItemInQueue(item, queue),
                             /*useDelayedPrivatization=*/true, &symTable);
@@ -7136,7 +6962,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  
    EntryBlockArgs taskArgs;
    taskArgs.priv.syms = dsp.getDelayedPrivSymbols();
-@@ -1992,14 +2412,33 @@
+@@ -2066,14 +2487,33 @@
             mlir::Location loc, const ConstructQueue &queue,
             ConstructQueue::const_iterator item) {
    lower::StatementContext stmtCtx;
@@ -7173,7 +6999,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  }
  
  //===----------------------------------------------------------------------===//
-@@ -2023,7 +2462,8 @@
+@@ -2097,7 +2537,8 @@
    DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                             /*shouldCollectPreDeterminedSymbols=*/true,
                             enableDelayedPrivatizationStaging, &symTable);
@@ -7183,7 +7009,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  
    mlir::omp::LoopNestOperands loopNestClauseOps;
    llvm::SmallVector<const semantics::Symbol *> iv;
-@@ -2048,7 +2488,6 @@
+@@ -2122,7 +2563,6 @@
                              const ConstructQueue &queue,
                              ConstructQueue::const_iterator item) {
    lower::StatementContext stmtCtx;
@@ -7191,7 +7017,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    mlir::omp::WsloopOperands wsloopClauseOps;
    llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
    genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
-@@ -2059,6 +2498,7 @@
+@@ -2133,6 +2573,7 @@
                             /*shouldCollectPreDeterminedSymbols=*/true,
                             /*useDelayedPrivatization=*/false, &symTable);
    dsp.processStep1();
@@ -7199,7 +7025,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  
    mlir::omp::LoopNestOperands loopNestClauseOps;
    llvm::SmallVector<const semantics::Symbol *> iv;
-@@ -2096,7 +2536,8 @@
+@@ -2170,7 +2611,8 @@
      dsp.emplace(converter, semaCtx, item->clauses, eval,
                  lower::omp::isLastItemInQueue(item, queue),
                  /*useDelayedPrivatization=*/true, &symTable);
@@ -7209,7 +7035,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    }
  
    EntryBlockArgs parallelArgs;
-@@ -2107,7 +2548,8 @@
+@@ -2181,7 +2623,8 @@
    parallelArgs.reduction.vars = parallelClauseOps.reductionVars;
    genParallelOp(converter, symTable, semaCtx, eval, loc, queue, item,
                  parallelClauseOps, parallelArgs,
@@ -7219,7 +7045,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  }
  
  static void genStandaloneSimd(lower::AbstractConverter &converter,
-@@ -2126,6 +2568,7 @@
+@@ -2200,6 +2643,7 @@
                             /*shouldCollectPreDeterminedSymbols=*/true,
                             /*useDelayedPrivatization=*/false, &symTable);
    dsp.processStep1();
@@ -7227,7 +7053,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  
    mlir::omp::LoopNestOperands loopNestClauseOps;
    llvm::SmallVector<const semantics::Symbol *> iv;
-@@ -2179,7 +2622,8 @@
+@@ -2253,7 +2697,8 @@
    DataSharingProcessor dsp(converter, semaCtx, doItem->clauses, eval,
                             /*shouldCollectPreDeterminedSymbols=*/true,
                             /*useDelayedPrivatization=*/true, &symTable);
@@ -7237,7 +7063,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  
    EntryBlockArgs parallelArgs;
    parallelArgs.priv.syms = dsp.getDelayedPrivSymbols();
-@@ -2247,7 +2691,8 @@
+@@ -2321,7 +2766,8 @@
    DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
                             /*shouldCollectPreDeterminedSymbols=*/true,
                             /*useDelayedPrivatization=*/true, &symTable);
@@ -7247,7 +7073,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  
    EntryBlockArgs parallelArgs;
    parallelArgs.priv.syms = dsp.getDelayedPrivSymbols();
-@@ -2336,6 +2781,7 @@
+@@ -2410,6 +2856,7 @@
                             /*shouldCollectPreDeterminedSymbols=*/true,
                             /*useDelayedPrivatization=*/false, &symTable);
    dsp.processStep1();
@@ -7255,7 +7081,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  
    // Pass the innermost leaf construct's clauses because that's where COLLAPSE
    // is placed by construct decomposition.
-@@ -2393,6 +2839,7 @@
+@@ -2467,6 +2914,7 @@
                             /*shouldCollectPreDeterminedSymbols=*/true,
                             /*useDelayedPrivatization=*/false, &symTable);
    dsp.processStep1();
@@ -7263,7 +7089,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
  
    // Pass the innermost leaf construct's clauses because that's where COLLAPSE
    // is placed by construct decomposition.
-@@ -2943,6 +3390,7 @@
+@@ -3019,6 +3467,7 @@
    ConstructQueue queue{
        buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
                            eval, source, directive, clauses)};
@@ -7271,7 +7097,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                   queue.begin());
  }
-@@ -2968,6 +3416,7 @@
+@@ -3044,6 +3493,7 @@
        std::get<parser::OmpSectionBlocks>(sectionsConstruct.t);
    clauses.append(makeClauses(
        std::get<parser::OmpClauseList>(endSectionsDirective.t), semaCtx));
@@ -7279,9 +7105,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/OpenMP.cpp llvm-
    mlir::Location currentLocation = converter.getCurrentLocation();
  
    llvm::omp::Directive directive =
-diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/ReductionProcessor.h llvm-project/flang/lib/Lower/OpenMP/ReductionProcessor.h
---- llvm-project.upstream/flang/lib/Lower/OpenMP/ReductionProcessor.h	2024-11-19 12:55:00.555014680 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/ReductionProcessor.h	2024-11-19 12:49:05.163149319 -0500
+diff -Naur -x .git llvm-project-trunk/flang/lib/Lower/OpenMP/ReductionProcessor.h llvm-project-trunk-atd/flang/lib/Lower/OpenMP/ReductionProcessor.h
+--- llvm-project-trunk/flang/lib/Lower/OpenMP/ReductionProcessor.h	2024-10-18 17:40:44.672867537 -0500
++++ llvm-project-trunk-atd/flang/lib/Lower/OpenMP/ReductionProcessor.h	2024-10-18 17:40:53.600776003 -0500
 @@ -13,10 +13,9 @@
  #ifndef FORTRAN_LOWER_REDUCTIONPROCESSOR_H
  #define FORTRAN_LOWER_REDUCTIONPROCESSOR_H
@@ -7294,10 +7120,10 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/ReductionProcess
  #include "flang/Semantics/symbol.h"
  #include "flang/Semantics/type.h"
  #include "mlir/IR/Location.h"
-diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Utils.cpp llvm-project/flang/lib/Lower/OpenMP/Utils.cpp
---- llvm-project.upstream/flang/lib/Lower/OpenMP/Utils.cpp	2024-11-19 12:55:58.483825763 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/Utils.cpp	2024-11-19 12:49:05.164149315 -0500
-@@ -10,20 +10,27 @@
+diff -Naur -x .git llvm-project-trunk/flang/lib/Lower/OpenMP/Utils.cpp llvm-project-trunk-atd/flang/lib/Lower/OpenMP/Utils.cpp
+--- llvm-project-trunk/flang/lib/Lower/OpenMP/Utils.cpp	2024-11-23 20:25:46.215205525 -0600
++++ llvm-project-trunk-atd/flang/lib/Lower/OpenMP/Utils.cpp	2024-11-23 20:26:13.811106303 -0600
+@@ -10,20 +10,26 @@
  //
  //===----------------------------------------------------------------------===//
  
@@ -7305,8 +7131,8 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Utils.cpp llvm-p
 +#include <flang/Lower/OpenMP/Utils.h>
  
 -#include "Clauses.h"
-+#include <DirectivesCommon.h>
-+
+ #include <DirectivesCommon.h>
+ 
 +#include <flang/Evaluate/fold.h>
  #include <flang/Lower/AbstractConverter.h>
 +#include <flang/Lower/ConvertExprToHLFIR.h>
@@ -7324,516 +7150,12 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Utils.cpp llvm-p
 +#include <mlir/Analysis/TopologicalSortUtils.h>
 +#include <mlir/Dialect/Arith/IR/Arith.h>
  
--#include <algorithm>
- #include <numeric>
- 
- llvm::cl::opt<bool> treatIndexAsSection(
-@@ -117,14 +124,12 @@
-     symbolAndClause.emplace_back(clause, *object.sym());
- }
- 
--mlir::omp::MapInfoOp
--createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
--                mlir::Value baseAddr, mlir::Value varPtrPtr, std::string name,
--                llvm::ArrayRef<mlir::Value> bounds,
--                llvm::ArrayRef<mlir::Value> members,
--                mlir::DenseIntElementsAttr membersIndex, uint64_t mapType,
--                mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy,
--                bool partialMap) {
-+mlir::omp::MapInfoOp createMapInfoOp(
-+    fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value baseAddr,
-+    mlir::Value varPtrPtr, std::string name, llvm::ArrayRef<mlir::Value> bounds,
-+    llvm::ArrayRef<mlir::Value> members, mlir::ArrayAttr membersIndex,
-+    uint64_t mapType, mlir::omp::VariableCaptureKind mapCaptureType,
-+    mlir::Type retTy, bool partialMap) {
-   if (auto boxTy = llvm::dyn_cast<fir::BaseBoxType>(baseAddr.getType())) {
-     baseAddr = builder.create<fir::BoxAddrOp>(loc, baseAddr);
-     retTy = baseAddr.getType();
-@@ -145,11 +150,229 @@
-       builder.getIntegerAttr(builder.getIntegerType(64, false), mapType),
-       builder.getAttr<mlir::omp::VariableCaptureKindAttr>(mapCaptureType),
-       builder.getStringAttr(name), builder.getBoolAttr(partialMap));
--
-   return op;
- }
- 
--static int
-+omp::ObjectList gatherObjects(omp::Object obj,
-+                              semantics::SemanticsContext &semaCtx) {
-+  omp::ObjectList objList;
-+  std::optional<omp::Object> baseObj = obj;
-+  while (baseObj.has_value()) {
-+    objList.push_back(baseObj.value());
-+    baseObj = getBaseObject(baseObj.value(), semaCtx);
-+  }
-+  return omp::ObjectList{llvm::reverse(objList)};
-+}
-+
-+bool isDuplicateMemberMapInfo(OmpMapParentAndMemberData &parentMembers,
-+                              llvm::SmallVectorImpl<int64_t> &memberIndices) {
-+  for (auto memberData : parentMembers.memberPlacementIndices)
-+    if (std::equal(memberIndices.begin(), memberIndices.end(),
-+                   memberData.begin()))
-+      return true;
-+  return false;
-+}
-+
-+static void generateArrayIndices(lower::AbstractConverter &converter,
-+                                 fir::FirOpBuilder &firOpBuilder,
-+                                 lower::StatementContext &stmtCtx,
-+                                 mlir::Location clauseLocation,
-+                                 llvm::SmallVectorImpl<mlir::Value> &indices,
-+                                 omp::Object object) {
-+  if (auto maybeRef = evaluate::ExtractDataRef(*object.ref())) {
-+    evaluate::DataRef ref = *maybeRef;
-+    if (auto *arr = std::get_if<evaluate::ArrayRef>(&ref.u)) {
-+      for (auto v : arr->subscript()) {
-+        if (std::holds_alternative<Triplet>(v.u)) {
-+          llvm_unreachable("Triplet indexing in map clause is unsupported");
-+        } else {
-+          auto expr =
-+              std::get<Fortran::evaluate::IndirectSubscriptIntegerExpr>(v.u);
-+          mlir::Value subscript = fir::getBase(
-+              converter.genExprValue(toEvExpr(expr.value()), stmtCtx));
-+          mlir::Value one = firOpBuilder.createIntegerConstant(
-+              clauseLocation, firOpBuilder.getIndexType(), 1);
-+          subscript = firOpBuilder.createConvert(
-+              clauseLocation, firOpBuilder.getIndexType(), subscript);
-+          indices.push_back(firOpBuilder.create<mlir::arith::SubIOp>(
-+              clauseLocation, subscript, one));
-+        }
-+      }
-+    }
-+  }
-+}
-+
-+static mlir::Value generateBoundsComparisonBranch(
-+    fir::FirOpBuilder &firOpBuilder, mlir::Location clauseLocation,
-+    mlir::arith::CmpIPredicate pred, mlir::Value index, mlir::Value bound) {
-+  auto cmp = firOpBuilder.create<mlir::arith::CmpIOp>(clauseLocation, pred,
-+                                                      index, bound);
-+  return firOpBuilder
-+      .genIfOp(clauseLocation, {firOpBuilder.getIndexType()}, cmp,
-+               /*withElseRegion=*/true)
-+      .genThen(
-+          [&]() { firOpBuilder.create<fir::ResultOp>(clauseLocation, index); })
-+      .genElse([&] {
-+        firOpBuilder.create<fir::ResultOp>(clauseLocation,
-+                                           mlir::ValueRange{bound});
-+      })
-+      .getResults()[0];
-+}
-+
-+static void extendBoundsFromMultipleSubscripts(
-+    lower::AbstractConverter &converter, lower::StatementContext &stmtCtx,
-+    mlir::omp::MapInfoOp mapOp, omp::ObjectList objList) {
-+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-+  if (!mapOp.getBounds().empty()) {
-+    for (omp::Object v : objList) {
-+      llvm::SmallVector<mlir::Value> indices;
-+      generateArrayIndices(converter, firOpBuilder, stmtCtx, mapOp.getLoc(),
-+                           indices, v);
-+
-+      for (size_t i = 0; i < mapOp.getBounds().size(); ++i) {
-+        if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
-+                mapOp.getBounds()[i].getDefiningOp())) {
-+          boundOp.getUpperBoundMutable().assign(generateBoundsComparisonBranch(
-+              firOpBuilder, mapOp.getLoc(), mlir::arith::CmpIPredicate::ugt,
-+              indices[i], boundOp.getUpperBoundMutable().begin()->get()));
-+          boundOp.getLowerBoundMutable().assign(generateBoundsComparisonBranch(
-+              firOpBuilder, mapOp.getLoc(), mlir::arith::CmpIPredicate::ult,
-+              indices[i], boundOp.getLowerBoundMutable().begin()->get()));
-+        }
-+      }
-+    }
-+  }
-+
-+  // reorder all SSA's we may have generated to make sure we maintain ordering.
-+  sortTopologically(mapOp->getBlock());
-+}
-+
-+// When mapping members of derived types, there is a chance that one of the
-+// members along the way to a mapped member is an descriptor. In which case
-+// we have to make sure we generate a map for those along the way otherwise
-+// we will be missing a chunk of data required to actually map the member
-+// type to device. This function effectively generates these maps and the
-+// appropriate data accesses required to generate these maps. It will avoid
-+// creating duplicate maps, as duplicates are just as bad as unmapped
-+// descriptor data in a lot of cases for the runtime (and unnecessary
-+// data movement should be avoided where possible)
-+mlir::Value createParentSymAndGenIntermediateMaps(
-+    mlir::Location clauseLocation, lower::AbstractConverter &converter,
-+    semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx,
-+    omp::ObjectList &objectList, llvm::SmallVector<int64_t> &indices,
-+    OmpMapParentAndMemberData &parentMemberIndices, std::string asFortran,
-+    llvm::omp::OpenMPOffloadMappingFlags mapTypeBits) {
-+
-+  auto arrayExprWithSubscript = [](omp::Object obj) {
-+    if (auto maybeRef = evaluate::ExtractDataRef(*obj.ref())) {
-+      evaluate::DataRef ref = *maybeRef;
-+      if (auto *arr = std::get_if<evaluate::ArrayRef>(&ref.u))
-+        return !arr->subscript().empty();
-+    }
-+    return false;
-+  };
-+
-+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-+  lower::AddrAndBoundsInfo parentBaseAddr = lower::getDataOperandBaseAddr(
-+      converter, firOpBuilder, *objectList[0].sym(), clauseLocation);
-+  mlir::Value curValue = parentBaseAddr.addr;
-+
-+  // Iterate over all objects in the objectList, this should consist of all
-+  // record types between the parent and the member being mapped (including
-+  // the parent). The object list may also contain array objects as well,
-+  // this can occur when specifying bounds or a specific element access
-+  // within a member map, we skip these.
-+  size_t currentIndex = 0;
-+  for (size_t i = 0; i < objectList.size(); ++i) {
-+    if (fir::SequenceType arrType = mlir::dyn_cast<fir::SequenceType>(
-+            fir::unwrapPassByRefType(curValue.getType()))) {
-+      if (arrayExprWithSubscript(objectList[i])) {
-+        llvm::SmallVector<mlir::Value> indices;
-+        generateArrayIndices(converter, firOpBuilder, stmtCtx, clauseLocation,
-+                             indices, objectList[i]);
-+        assert(!indices.empty() && "missing expected indices for map clause");
-+        curValue = firOpBuilder.create<fir::CoordinateOp>(
-+            clauseLocation, firOpBuilder.getRefType(arrType.getEleTy()),
-+            curValue, indices);
-+      }
-+    }
-+
-+    if (fir::RecordType recordType = mlir::dyn_cast<fir::RecordType>(
-+            fir::unwrapPassByRefType(curValue.getType()))) {
-+      mlir::Value idxConst = firOpBuilder.createIntegerConstant(
-+          clauseLocation, firOpBuilder.getIndexType(), indices[currentIndex]);
-+      mlir::Type memberTy =
-+          recordType.getTypeList().at(indices[currentIndex]).second;
-+      curValue = firOpBuilder.create<fir::CoordinateOp>(
-+          clauseLocation, firOpBuilder.getRefType(memberTy), curValue,
-+          idxConst);
-+
-+      if ((currentIndex == indices.size() - 1) ||
-+          !fir::isTypeWithDescriptor(memberTy)) {
-+        currentIndex++;
-+        continue;
-+      }
-+
-+      llvm::SmallVector<int64_t> interimIndices(
-+          indices.begin(), std::next(indices.begin(), currentIndex + 1));
-+      if (!isDuplicateMemberMapInfo(parentMemberIndices, interimIndices)) {
-+        // Generate initial bounds operations using the standard lowering
-+        // utility
-+        llvm::SmallVector<mlir::Value> intermBounds;
-+        if (i + 1 < objectList.size() &&
-+            objectList[i + 1].sym()->IsObjectArray()) {
-+          std::stringstream intermFortran;
-+          Fortran::lower::gatherDataOperandAddrAndBounds<
-+              mlir::omp::MapBoundsOp, mlir::omp::MapBoundsType>(
-+              converter, converter.getFirOpBuilder(), semaCtx,
-+              converter.getFctCtx(), *objectList[i + 1].sym(),
-+              objectList[i + 1].ref(), clauseLocation, intermFortran,
-+              intermBounds, treatIndexAsSection);
-+        }
-+
-+        llvm::omp::OpenMPOffloadMappingFlags intermMapType = mapTypeBits;
-+        // remove all map TO, FROM and TOFROM bits, from the intermediate
-+        // allocatable maps, we simply wish to alloc or release them. It may be
-+        // safer to just pass OMP_MAP_NONE as the map type, but we may still
-+        // need some of the other map types the mapped member utilises, so for
-+        // now it's good to keep an eye on this.
-+        intermMapType &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
-+        intermMapType &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
-+
-+        mlir::omp::MapInfoOp mapOp = createMapInfoOp(
-+            firOpBuilder, clauseLocation, curValue,
-+            /*varPtrPtr=*/mlir::Value{}, asFortran,
-+            /*bounds=*/intermBounds,
-+            /*members=*/{},
-+            /*membersIndex=*/mlir::ArrayAttr{},
-+            static_cast<
-+                std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
-+                intermMapType),
-+            mlir::omp::VariableCaptureKind::ByRef, curValue.getType());
-+
-+        parentMemberIndices.memberPlacementIndices.push_back(interimIndices);
-+        parentMemberIndices.memberMap.push_back(mapOp);
-+      } else if (objectList[i].sym()->IsObjectArray() &&
-+                 arrayExprWithSubscript(objectList[i])) {
-+        auto *it = std::find(parentMemberIndices.memberPlacementIndices.begin(),
-+                             parentMemberIndices.memberPlacementIndices.end(),
-+                             interimIndices);
-+        auto v = std::distance(
-+            parentMemberIndices.memberPlacementIndices.begin(), it);
-+        extendBoundsFromMultipleSubscripts(converter, stmtCtx,
-+                                           parentMemberIndices.memberMap[v],
-+                                           {objectList[i]});
-+      }
-+
-+      curValue = firOpBuilder.create<fir::LoadOp>(clauseLocation, curValue);
-+      currentIndex++;
-+    }
-+  }
-+
-+  return curValue;
-+}
-+
-+static int64_t
- getComponentPlacementInParent(const semantics::Symbol *componentSym) {
-   const auto *derived = componentSym->owner()
-                             .derivedTypeSpec()
-@@ -183,173 +406,126 @@
-   return getComponentObject(baseObj.value(), semaCtx);
- }
- 
--static void
--generateMemberPlacementIndices(const Object &object,
--                               llvm::SmallVectorImpl<int> &indices,
--                               semantics::SemanticsContext &semaCtx) {
-+void generateMemberPlacementIndices(const Object &object,
-+                                    llvm::SmallVectorImpl<int64_t> &indices,
-+                                    semantics::SemanticsContext &semaCtx) {
-   auto compObj = getComponentObject(object, semaCtx);
-+
-   while (compObj) {
--    indices.push_back(getComponentPlacementInParent(compObj->sym()));
-+    int64_t index = getComponentPlacementInParent(compObj->sym());
-+    assert(index >= 0);
-+    indices.push_back(index);
-     compObj =
-         getComponentObject(getBaseObject(compObj.value(), semaCtx), semaCtx);
-   }
- 
--  indices = llvm::SmallVector<int>{llvm::reverse(indices)};
-+  indices = llvm::SmallVector<int64_t>{llvm::reverse(indices)};
- }
- 
--void addChildIndexAndMapToParent(
--    const omp::Object &object,
--    std::map<const semantics::Symbol *,
--             llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
--    mlir::omp::MapInfoOp &mapOp, semantics::SemanticsContext &semaCtx) {
--  std::optional<evaluate::DataRef> dataRef = ExtractDataRef(object.ref());
--  assert(dataRef.has_value() &&
--         "DataRef could not be extracted during mapping of derived type "
--         "cannot proceed");
--  const semantics::Symbol *parentSym = &dataRef->GetFirstSymbol();
--  assert(parentSym && "Could not find parent symbol during lower of "
--                      "a component member in OpenMP map clause");
--  llvm::SmallVector<int> indices;
-+void addChildIndexAndMapToParent(const omp::Object &object,
-+                                 OmpMapParentAndMemberData &parentMemberIndices,
-+                                 mlir::omp::MapInfoOp &mapOp,
-+                                 semantics::SemanticsContext &semaCtx) {
-+  llvm::SmallVector<int64_t> indices;
-   generateMemberPlacementIndices(object, indices, semaCtx);
--  parentMemberIndices[parentSym].push_back({indices, mapOp});
-+  parentMemberIndices.memberPlacementIndices.push_back(indices);
-+  parentMemberIndices.memberMap.push_back(mapOp);
- }
- 
--static void calculateShapeAndFillIndices(
--    llvm::SmallVectorImpl<int64_t> &shape,
--    llvm::SmallVectorImpl<OmpMapMemberIndicesData> &memberPlacementData) {
--  shape.push_back(memberPlacementData.size());
--  size_t largestIndicesSize =
--      std::max_element(memberPlacementData.begin(), memberPlacementData.end(),
--                       [](auto a, auto b) {
--                         return a.memberPlacementIndices.size() <
--                                b.memberPlacementIndices.size();
--                       })
--          ->memberPlacementIndices.size();
--  shape.push_back(largestIndicesSize);
--
--  // DenseElementsAttr expects a rectangular shape for the data, so all
--  // index lists have to be of the same length, this emplaces -1 as filler.
--  for (auto &v : memberPlacementData) {
--    if (v.memberPlacementIndices.size() < largestIndicesSize) {
--      auto *prevEnd = v.memberPlacementIndices.end();
--      v.memberPlacementIndices.resize(largestIndicesSize);
--      std::fill(prevEnd, v.memberPlacementIndices.end(), -1);
--    }
-+bool isMemberOrParentAllocatableOrPointer(
-+    const Object &object, semantics::SemanticsContext &semaCtx) {
-+  if (semantics::IsAllocatableOrObjectPointer(object.sym()))
-+    return true;
-+
-+  auto compObj = getBaseObject(object, semaCtx);
-+  while (compObj) {
-+    if (compObj.has_value() &&
-+        semantics::IsAllocatableOrObjectPointer(compObj.value().sym()))
-+      return true;
-+    compObj = getBaseObject(compObj.value(), semaCtx);
-   }
--}
- 
--static mlir::DenseIntElementsAttr createDenseElementsAttrFromIndices(
--    llvm::SmallVectorImpl<OmpMapMemberIndicesData> &memberPlacementData,
--    fir::FirOpBuilder &builder) {
--  llvm::SmallVector<int64_t> shape;
--  calculateShapeAndFillIndices(shape, memberPlacementData);
--
--  llvm::SmallVector<int> indicesFlattened =
--      std::accumulate(memberPlacementData.begin(), memberPlacementData.end(),
--                      llvm::SmallVector<int>(),
--                      [](llvm::SmallVector<int> &x, OmpMapMemberIndicesData y) {
--                        x.insert(x.end(), y.memberPlacementIndices.begin(),
--                                 y.memberPlacementIndices.end());
--                        return x;
--                      });
--
--  return mlir::DenseIntElementsAttr::get(
--      mlir::VectorType::get(shape,
--                            mlir::IntegerType::get(builder.getContext(), 32)),
--      indicesFlattened);
-+  return false;
- }
- 
- void insertChildMapInfoIntoParent(
--    lower::AbstractConverter &converter,
--    std::map<const semantics::Symbol *,
--             llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
-+    lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
-+    lower::StatementContext &stmtCtx,
-+    std::map<Object, OmpMapParentAndMemberData> &parentMemberIndices,
-     llvm::SmallVectorImpl<mlir::Value> &mapOperands,
--    llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms) {
-+    llvm::SmallVectorImpl<const semantics::Symbol *> &mapSymbols) {
-+
-+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-+
-   for (auto indices : parentMemberIndices) {
-     bool parentExists = false;
-     size_t parentIdx;
--    for (parentIdx = 0; parentIdx < mapSyms.size(); ++parentIdx) {
--      if (mapSyms[parentIdx] == indices.first) {
-+
-+    for (parentIdx = 0; parentIdx < mapSymbols.size(); ++parentIdx)
-+      if (mapSymbols[parentIdx] == indices.first.sym()) {
-         parentExists = true;
-         break;
-       }
--    }
- 
-     if (parentExists) {
-       auto mapOp = llvm::cast<mlir::omp::MapInfoOp>(
-           mapOperands[parentIdx].getDefiningOp());
- 
--      // NOTE: To maintain appropriate SSA ordering, we move the parent map
--      // which will now have references to its children after the last
--      // of its members to be generated. This is necessary when a user
--      // has defined a series of parent and children maps where the parent
--      // precedes the children. An alternative, may be to do
--      // delayed generation of map info operations from the clauses and
--      // organize them first before generation.
--      mapOp->moveAfter(indices.second.back().memberMap);
--
--      for (auto memberIndicesData : indices.second)
--        mapOp.getMembersMutable().append(
--            memberIndicesData.memberMap.getResult());
-+      for (mlir::omp::MapInfoOp memberMap : indices.second.memberMap)
-+        mapOp.getMembersMutable().append(memberMap.getResult());
- 
--      mapOp.setMembersIndexAttr(createDenseElementsAttrFromIndices(
--          indices.second, converter.getFirOpBuilder()));
-+      mapOp.setMembersIndexAttr(firOpBuilder.create2DIntegerArrayAttr(
-+          indices.second.memberPlacementIndices));
-+
-+      // Not only does this extend bounds if multiple subscripts are
-+      // defined for a map parent, but it also performs a topological
-+      // sort, re-ordering SSA values so everything maintains correct
-+      // ordering, this by extension shuffles the parent map, into the
-+      // correct position after it's member definitions, as when we fall
-+      // into this segment of the if statement, the parent map information
-+      // has been generated prior to it's members in most cases.
-+      extendBoundsFromMultipleSubscripts(converter, stmtCtx, mapOp,
-+                                         indices.second.parentObjList);
-     } else {
-       // NOTE: We take the map type of the first child, this may not
-       // be the correct thing to do, however, we shall see. For the moment
-       // it allows this to work with enter and exit without causing MLIR
-       // verification issues. The more appropriate thing may be to take
-       // the "main" map type clause from the directive being used.
--      uint64_t mapType = indices.second[0].memberMap.getMapType().value_or(0);
--
--      // create parent to emplace and bind members
--      mlir::Value origSymbol = converter.getSymbolAddress(*indices.first);
-+      uint64_t mapType = indices.second.memberMap[0].getMapType().value_or(0);
- 
-       llvm::SmallVector<mlir::Value> members;
--      for (OmpMapMemberIndicesData memberIndicesData : indices.second)
--        members.push_back((mlir::Value)memberIndicesData.memberMap);
-+      for (mlir::omp::MapInfoOp memberMap : indices.second.memberMap)
-+        members.push_back(memberMap.getResult());
- 
--      mlir::Value mapOp = createMapInfoOp(
--          converter.getFirOpBuilder(), origSymbol.getLoc(), origSymbol,
--          /*varPtrPtr=*/mlir::Value(), indices.first->name().ToString(),
--          /*bounds=*/{}, members,
--          createDenseElementsAttrFromIndices(indices.second,
--                                             converter.getFirOpBuilder()),
--          mapType, mlir::omp::VariableCaptureKind::ByRef, origSymbol.getType(),
-+      // create parent to emplace and bind members
-+      llvm::SmallVector<mlir::Value> bounds;
-+      std::stringstream asFortran;
-+      lower::AddrAndBoundsInfo info =
-+          lower::gatherDataOperandAddrAndBounds<mlir::omp::MapBoundsOp,
-+                                                mlir::omp::MapBoundsType>(
-+              converter, firOpBuilder, semaCtx, converter.getFctCtx(),
-+              *indices.first.sym(), indices.first.ref(),
-+              converter.getCurrentLocation(), asFortran, bounds,
-+              treatIndexAsSection);
-+
-+      mlir::omp::MapInfoOp mapOp = createMapInfoOp(
-+          firOpBuilder, info.rawInput.getLoc(), info.rawInput,
-+          /*varPtrPtr=*/mlir::Value(), asFortran.str(), bounds, members,
-+          firOpBuilder.create2DIntegerArrayAttr(
-+              indices.second.memberPlacementIndices),
-+          mapType, mlir::omp::VariableCaptureKind::ByRef,
-+          info.rawInput.getType(),
-           /*partialMap=*/true);
- 
-+      extendBoundsFromMultipleSubscripts(converter, stmtCtx, mapOp,
-+                                         indices.second.parentObjList);
-       mapOperands.push_back(mapOp);
--      mapSyms.push_back(indices.first);
-+      mapSymbols.push_back(indices.first.sym());
-     }
-   }
- }
+ #include <iterator>
  
--semantics::Symbol *getOmpObjectSymbol(const parser::OmpObject &ompObject) {
--  semantics::Symbol *sym = nullptr;
--  Fortran::common::visit(
--      common::visitors{
--          [&](const parser::Designator &designator) {
--            if (auto *arrayEle =
--                    parser::Unwrap<parser::ArrayElement>(designator)) {
--              // Use getLastName to retrieve the arrays symbol, this will
--              // provide the farthest right symbol (the last) in a designator,
--              // i.e. providing something like the following:
--              // "dtype1%dtype2%array[2:10]", will result in "array"
--              sym = GetLastName(arrayEle->base).symbol;
--            } else if (auto *structComp =
--                           parser::Unwrap<parser::StructureComponent>(
--                               designator)) {
--              sym = structComp->component.symbol;
--            } else if (const parser::Name *name =
--                           semantics::getDesignatorNameIfDataRef(designator)) {
--              sym = name->symbol;
--            }
--          },
--          [&](const parser::Name &name) { sym = name.symbol; }},
--      ompObject.u);
--  return sym;
--}
--
- void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp,
-                                      mlir::Location loc) {
-   using Lastprivate = omp::clause::Lastprivate;
-diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Utils.h llvm-project/flang/lib/Lower/OpenMP/Utils.h
---- llvm-project.upstream/flang/lib/Lower/OpenMP/Utils.h	2024-11-19 12:55:58.483825763 -0500
-+++ llvm-project/flang/lib/Lower/OpenMP/Utils.h	1969-12-31 19:00:00.000000000 -0500
-@@ -1,108 +0,0 @@
+diff -Naur -x .git llvm-project-trunk/flang/lib/Lower/OpenMP/Utils.h llvm-project-trunk-atd/flang/lib/Lower/OpenMP/Utils.h
+--- llvm-project-trunk/flang/lib/Lower/OpenMP/Utils.h	2024-11-23 20:25:46.215205525 -0600
++++ llvm-project-trunk-atd/flang/lib/Lower/OpenMP/Utils.h	1969-12-31 18:00:00.000000000 -0600
+@@ -1,169 +0,0 @@
 -//===-- Lower/OpenMP/Utils.h ------------------------------------*- C++ -*-===//
 -//
 -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -7850,6 +7172,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Utils.h llvm-pro
 -#include "mlir/IR/Location.h"
 -#include "mlir/IR/Value.h"
 -#include "llvm/Support/CommandLine.h"
+-#include <cstdint>
 -
 -extern llvm::cl::opt<bool> treatIndexAsSection;
 -extern llvm::cl::opt<bool> enableDelayedPrivatization;
@@ -7870,6 +7193,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Utils.h llvm-pro
 -} // namespace parser
 -
 -namespace lower {
+-class StatementContext;
 -namespace pft {
 -struct Evaluation;
 -}
@@ -7885,37 +7209,98 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Utils.h llvm-pro
 -// and index data when lowering OpenMP map clauses. Keeps track of the
 -// placement of the component in the derived type hierarchy it rests within,
 -// alongside the generated mlir::omp::MapInfoOp for the mapped component.
--struct OmpMapMemberIndicesData {
+-//
+-// As an example of what the contents of this data structure may be like,
+-// when provided the following derived type and map of that type:
+-//
+-// type :: bottom_layer
+-//   real(8) :: i2
+-//   real(4) :: array_i2(10)
+-//   real(4) :: array_j2(10)
+-// end type bottom_layer
+-//
+-// type :: top_layer
+-//   real(4) :: i
+-//   integer(4) :: array_i(10)
+-//   real(4) :: j
+-//   type(bottom_layer) :: nested
+-//   integer, allocatable :: array_j(:)
+-//   integer(4) :: k
+-// end type top_layer
+-//
+-// type(top_layer) :: top_dtype
+-//
+-// map(tofrom: top_dtype%nested%i2, top_dtype%k, top_dtype%nested%array_i2)
+-//
+-// We would end up with an OmpMapParentAndMemberData populated like below:
+-//
+-// memberPlacementIndices:
+-//  Vector 1: 3, 0
+-//  Vector 2: 5
+-//  Vector 3: 3, 1
+-//
+-// memberMap:
+-// Entry 1: omp.map.info for "top_dtype%nested%i2"
+-// Entry 2: omp.map.info for "top_dtype%k"
+-// Entry 3: omp.map.info for "top_dtype%nested%array_i2"
+-//
+-// And this OmpMapParentAndMemberData would be accessed via the parent
+-// symbol for top_dtype. Other parent derived type instances that have
+-// members mapped would have there own OmpMapParentAndMemberData entry
+-// accessed via their own symbol.
+-struct OmpMapParentAndMemberData {
 -  // The indices representing the component members placement in its derived
 -  // type parents hierarchy.
--  llvm::SmallVector<int> memberPlacementIndices;
+-  llvm::SmallVector<llvm::SmallVector<int64_t>> memberPlacementIndices;
 -
 -  // Placement of the member in the member vector.
--  mlir::omp::MapInfoOp memberMap;
+-  llvm::SmallVector<mlir::omp::MapInfoOp> memberMap;
+-
+-  bool isDuplicateMemberMapInfo(llvm::SmallVectorImpl<int64_t> &memberIndices) {
+-    return llvm::find_if(memberPlacementIndices, [&](auto &memberData) {
+-             return llvm::equal(memberIndices, memberData);
+-           }) != memberPlacementIndices.end();
+-  }
+-
+-  void addChildIndexAndMapToParent(const omp::Object &object,
+-                                   mlir::omp::MapInfoOp &mapOp,
+-                                   semantics::SemanticsContext &semaCtx);
 -};
 -
 -mlir::omp::MapInfoOp
 -createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
--                mlir::Value baseAddr, mlir::Value varPtrPtr, std::string name,
--                mlir::ArrayRef<mlir::Value> bounds,
--                mlir::ArrayRef<mlir::Value> members,
--                mlir::DenseIntElementsAttr membersIndex, uint64_t mapType,
+-                mlir::Value baseAddr, mlir::Value varPtrPtr,
+-                llvm::StringRef name, llvm::ArrayRef<mlir::Value> bounds,
+-                llvm::ArrayRef<mlir::Value> members,
+-                mlir::ArrayAttr membersIndex, uint64_t mapType,
 -                mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy,
 -                bool partialMap = false);
 -
--void addChildIndexAndMapToParent(
--    const omp::Object &object,
--    std::map<const semantics::Symbol *,
--             llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
--    mlir::omp::MapInfoOp &mapOp, semantics::SemanticsContext &semaCtx);
--
 -void insertChildMapInfoIntoParent(
--    lower::AbstractConverter &converter,
--    std::map<const semantics::Symbol *,
--             llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
+-    Fortran::lower::AbstractConverter &converter,
+-    Fortran::semantics::SemanticsContext &semaCtx,
+-    Fortran::lower::StatementContext &stmtCtx,
+-    std::map<Object, OmpMapParentAndMemberData> &parentMemberIndices,
 -    llvm::SmallVectorImpl<mlir::Value> &mapOperands,
 -    llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms);
 -
+-void generateMemberPlacementIndices(
+-    const Object &object, llvm::SmallVectorImpl<int64_t> &indices,
+-    Fortran::semantics::SemanticsContext &semaCtx);
+-
+-bool isMemberOrParentAllocatableOrPointer(
+-    const Object &object, Fortran::semantics::SemanticsContext &semaCtx);
+-
+-mlir::Value createParentSymAndGenIntermediateMaps(
+-    mlir::Location clauseLocation, Fortran::lower::AbstractConverter &converter,
+-    semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx,
+-    omp::ObjectList &objectList, llvm::SmallVectorImpl<int64_t> &indices,
+-    OmpMapParentAndMemberData &parentMemberIndices, llvm::StringRef asFortran,
+-    llvm::omp::OpenMPOffloadMappingFlags mapTypeBits);
+-
+-omp::ObjectList gatherObjectsOf(omp::Object derivedTypeMember,
+-                                semantics::SemanticsContext &semaCtx);
+-
 -mlir::Type getLoopVarType(lower::AbstractConverter &converter,
 -                          std::size_t loopVarTypeSize);
 -
@@ -7928,8 +7313,6 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Utils.h llvm-pro
 -
 -int64_t getCollapseValue(const List<Clause> &clauses);
 -
--semantics::Symbol *getOmpObjectSymbol(const parser::OmpObject &ompObject);
--
 -void genObjectList(const ObjectList &objects,
 -                   lower::AbstractConverter &converter,
 -                   llvm::SmallVectorImpl<mlir::Value> &operands);
@@ -7942,37 +7325,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Lower/OpenMP/Utils.h llvm-pro
 -} // namespace Fortran
 -
 -#endif // FORTRAN_LOWER_OPENMPUTILS_H
-diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/Builder/FIRBuilder.cpp llvm-project/flang/lib/Optimizer/Builder/FIRBuilder.cpp
---- llvm-project.upstream/flang/lib/Optimizer/Builder/FIRBuilder.cpp	2024-11-19 12:55:58.483825763 -0500
-+++ llvm-project/flang/lib/Optimizer/Builder/FIRBuilder.cpp	2024-11-19 12:49:05.164149315 -0500
-@@ -267,6 +267,24 @@
-   return getEntryBlock();
- }
- 
-+static mlir::ArrayAttr makeI64ArrayAttr(llvm::ArrayRef<int64_t> values,
-+                                        mlir::MLIRContext *context) {
-+  llvm::SmallVector<mlir::Attribute, 4> attrs;
-+  for (auto &v : values)
-+    attrs.push_back(mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64),
-+                                           mlir::APInt(64, v)));
-+  return mlir::ArrayAttr::get(context, attrs);
-+}
-+
-+mlir::ArrayAttr fir::FirOpBuilder::create2DIntegerArrayAttr(
-+    llvm::SmallVectorImpl<llvm::SmallVector<int64_t>> &intData) {
-+  llvm::SmallVector<mlir::Attribute> arrayAttr;
-+  mlir::MLIRContext *context = getContext();
-+  for (auto &v : intData)
-+    arrayAttr.push_back(makeI64ArrayAttr(v, context));
-+  return mlir::ArrayAttr::get(context, arrayAttr);
-+}
-+
- mlir::Value fir::FirOpBuilder::createTemporaryAlloc(
-     mlir::Location loc, mlir::Type type, llvm::StringRef name,
-     mlir::ValueRange lenParams, mlir::ValueRange shape,
-diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/CMakeLists.txt llvm-project/flang/lib/Optimizer/OpenMP/CMakeLists.txt
---- llvm-project.upstream/flang/lib/Optimizer/OpenMP/CMakeLists.txt	2024-11-19 12:55:58.486825754 -0500
-+++ llvm-project/flang/lib/Optimizer/OpenMP/CMakeLists.txt	2024-11-19 12:49:05.170149297 -0500
+diff -Naur -x .git llvm-project-trunk/flang/lib/Optimizer/OpenMP/CMakeLists.txt llvm-project-trunk-atd/flang/lib/Optimizer/OpenMP/CMakeLists.txt
+--- llvm-project-trunk/flang/lib/Optimizer/OpenMP/CMakeLists.txt	2024-11-23 20:25:46.219205510 -0600
++++ llvm-project-trunk-atd/flang/lib/Optimizer/OpenMP/CMakeLists.txt	2024-11-23 20:26:13.815106288 -0600
 @@ -1,7 +1,9 @@
  get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
  
@@ -7983,7 +7338,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/CMakeLists.t
    MapsForPrivatizedSymbols.cpp
    MapInfoFinalization.cpp
    MarkDeclareTarget.cpp
-@@ -20,6 +22,7 @@
+@@ -21,6 +23,7 @@
    FIRSupport
    FortranCommon
    MLIRFuncDialect
@@ -7991,10 +7346,10 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/CMakeLists.t
    MLIROpenMPDialect
    HLFIRDialect
    MLIRIR
-diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp llvm-project/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
---- llvm-project.upstream/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp	2024-11-19 12:49:05.170149297 -0500
-@@ -0,0 +1,1027 @@
+diff -Naur -x .git llvm-project-trunk/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp llvm-project-trunk-atd/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
+--- llvm-project-trunk/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp	2024-11-23 20:26:13.815106288 -0600
+@@ -0,0 +1,1037 @@
 +//===- DoConcurrentConversion.cpp -- map `DO CONCURRENT` to OpenMP loops --===//
 +//
 +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -8033,7 +7388,8 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/DoConcurrent
 +#include "flang/Optimizer/OpenMP/Passes.h.inc"
 +} // namespace flangomp
 +
-+#define DEBUG_TYPE "fopenmp-do-concurrent-conversion"
++#define DEBUG_TYPE "do-concurrent-conversion"
++#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ")
 +
 +namespace Fortran {
 +namespace lower {
@@ -8172,9 +7528,24 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/DoConcurrent
 +  return false;
 +}
 +
++/// For the \p doLoop parameter, find the operations that declares its induction
++/// variable or allocates memory for it.
++mlir::Operation *findLoopIndVarMemDecl(fir::DoLoopOp doLoop) {
++  mlir::Value result = nullptr;
++  mlir::visitUsedValuesDefinedAbove(
++      doLoop.getRegion(), [&](mlir::OpOperand *operand) {
++        if (isIndVarUltimateOperand(operand->getOwner(), doLoop)) {
++          assert(result == nullptr &&
++                 "loop can have only one induction variable");
++          result = operand->get();
++        }
++      });
++
++  assert(result != nullptr && result.getDefiningOp() != nullptr);
++  return result.getDefiningOp();
++}
++
 +/// Collect the list of values used inside the loop but defined outside of it.
-+/// The first item in the returned list is always the loop's induction
-+/// variable.
 +void collectLoopLiveIns(fir::DoLoopOp doLoop,
 +                        llvm::SmallVectorImpl<mlir::Value> &liveIns) {
 +  llvm::SmallDenseSet<mlir::Value> seenValues;
@@ -8191,9 +7562,6 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/DoConcurrent
 +          return;
 +
 +        liveIns.push_back(operand->get());
-+
-+        if (isIndVarUltimateOperand(operand->getOwner(), doLoop))
-+          std::swap(*liveIns.begin(), *liveIns.rbegin());
 +      });
 +}
 +
@@ -8283,24 +7651,78 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/DoConcurrent
 +  opChain.insert(link);
 +}
 +
++/// Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff
++/// there are no operations in \p outerloop's other than:
++///
++/// 1. the operations needed to assing/update \p outerLoop's induction variable.
++/// 2. \p innerLoop itself.
++///
++/// \p return true if \p innerLoop is perfectly nested inside \p outerLoop
++/// according to the above definition.
++bool isPerfectlyNested(fir::DoLoopOp outerLoop, fir::DoLoopOp innerLoop) {
++  mlir::BackwardSliceOptions backwardSliceOptions;
++  backwardSliceOptions.inclusive = true;
++  // We will collect the backward slices for innerLoop's LB, UB, and step.
++  // However, we want to limit the scope of these slices to the scope of
++  // outerLoop's region.
++  backwardSliceOptions.filter = [&](mlir::Operation *op) {
++    return !mlir::areValuesDefinedAbove(op->getResults(),
++                                        outerLoop.getRegion());
++  };
++
++  mlir::ForwardSliceOptions forwardSliceOptions;
++  forwardSliceOptions.inclusive = true;
++  // We don't care about the outer-loop's induction variable's uses within the
++  // inner-loop, so we filter out these uses.
++  forwardSliceOptions.filter = [&](mlir::Operation *op) {
++    return mlir::areValuesDefinedAbove(op->getResults(), innerLoop.getRegion());
++  };
++
++  llvm::SetVector<mlir::Operation *> indVarSlice;
++  mlir::getForwardSlice(outerLoop.getInductionVar(), &indVarSlice,
++                        forwardSliceOptions);
++  llvm::DenseSet<mlir::Operation *> innerLoopSetupOpsSet(indVarSlice.begin(),
++                                                         indVarSlice.end());
++
++  llvm::DenseSet<mlir::Operation *> loopBodySet;
++  outerLoop.walk<mlir::WalkOrder::PreOrder>([&](mlir::Operation *op) {
++    if (op == outerLoop)
++      return mlir::WalkResult::advance();
++
++    if (op == innerLoop)
++      return mlir::WalkResult::skip();
++
++    if (mlir::isa<fir::ResultOp>(op))
++      return mlir::WalkResult::advance();
++
++    loopBodySet.insert(op);
++    return mlir::WalkResult::advance();
++  });
++
++  bool result = (loopBodySet == innerLoopSetupOpsSet);
++  mlir::Location loc = outerLoop.getLoc();
++  LLVM_DEBUG(DBGS() << "Loop pair starting at location " << loc << " is"
++                    << (result ? "" : " not") << " perfectly nested\n");
++
++  return result;
++}
++
 +/// Starting with `outerLoop` collect a perfectly nested loop nest, if any. This
 +/// function collects as much as possible loops in the nest; it case it fails to
 +/// recognize a certain nested loop as part of the nest it just returns the
 +/// parent loops it discovered before.
-+mlir::LogicalResult collectLoopNest(fir::DoLoopOp outerLoop,
++mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
 +                                    LoopNestToIndVarMap &loopNest) {
-+  assert(outerLoop.getUnordered());
-+  llvm::SmallVector<mlir::Value> outerLoopLiveIns;
-+  collectLoopLiveIns(outerLoop, outerLoopLiveIns);
++  assert(currentLoop.getUnordered());
 +
 +  while (true) {
 +    loopNest.try_emplace(
-+        outerLoop,
++        currentLoop,
 +        InductionVariableInfo{
-+            outerLoopLiveIns.front().getDefiningOp(),
-+            std::move(looputils::extractIndVarUpdateOps(outerLoop))});
++            findLoopIndVarMemDecl(currentLoop),
++            std::move(looputils::extractIndVarUpdateOps(currentLoop))});
 +
-+    auto directlyNestedLoops = outerLoop.getRegion().getOps<fir::DoLoopOp>();
++    auto directlyNestedLoops = currentLoop.getRegion().getOps<fir::DoLoopOp>();
 +    llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
 +
 +    for (auto nestedLoop : directlyNestedLoops)
@@ -8315,69 +7737,10 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/DoConcurrent
 +
 +    fir::DoLoopOp nestedUnorderedLoop = unorderedLoops.front();
 +
-+    if ((nestedUnorderedLoop.getLowerBound().getDefiningOp() == nullptr) ||
-+        (nestedUnorderedLoop.getUpperBound().getDefiningOp() == nullptr) ||
-+        (nestedUnorderedLoop.getStep().getDefiningOp() == nullptr))
++    if (!isPerfectlyNested(currentLoop, nestedUnorderedLoop))
 +      return mlir::failure();
 +
-+    llvm::SmallVector<mlir::Value> nestedLiveIns;
-+    collectLoopLiveIns(nestedUnorderedLoop, nestedLiveIns);
-+
-+    llvm::DenseSet<mlir::Value> outerLiveInsSet;
-+    llvm::DenseSet<mlir::Value> nestedLiveInsSet;
-+
-+    // Returns a "unified" view of an mlir::Value. This utility checks if the
-+    // value is defined by an op, and if so, return the first value defined by
-+    // that op (if there are many), otherwise just returns the value.
-+    //
-+    // This serves the purpose that if, for example, `%op_res#0` is used in the
-+    // outer loop and `%op_res#1` is used in the nested loop (or vice versa),
-+    // that we detect both as the same value. If we did not do so, we might
-+    // falesely detect that the 2 loops are not perfectly nested since they use
-+    // "different" sets of values.
-+    auto getUnifiedLiveInView = [](mlir::Value liveIn) {
-+      return liveIn.getDefiningOp() != nullptr
-+                 ? liveIn.getDefiningOp()->getResult(0)
-+                 : liveIn;
-+    };
-+
-+    // Re-package both lists of live-ins into sets so that we can use set
-+    // equality to compare the values used in the outerloop vs. the nestd one.
-+
-+    for (auto liveIn : nestedLiveIns)
-+      nestedLiveInsSet.insert(getUnifiedLiveInView(liveIn));
-+
-+    mlir::Value outerLoopIV;
-+    for (auto liveIn : outerLoopLiveIns) {
-+      outerLiveInsSet.insert(getUnifiedLiveInView(liveIn));
-+
-+      // Keep track of the IV of the outerloop. See `isPerfectlyNested` for more
-+      // info on the reason.
-+      if (outerLoopIV == nullptr)
-+        outerLoopIV = getUnifiedLiveInView(liveIn);
-+    }
-+
-+    // For the 2 loops to be perfectly nested, either:
-+    // * both would have exactly the same set of live-in values or,
-+    // * the outer loop would have exactly 1 extra live-in value: the outer
-+    //   loop's induction variable; this happens when the outer loop's IV is
-+    //   *not* referenced in the nested loop.
-+    bool isPerfectlyNested = [&]() {
-+      if (outerLiveInsSet == nestedLiveInsSet)
-+        return true;
-+
-+      if ((outerLiveInsSet.size() == nestedLiveIns.size() + 1) &&
-+          !nestedLiveInsSet.contains(outerLoopIV))
-+        return true;
-+
-+      return false;
-+    }();
-+
-+    if (!isPerfectlyNested)
-+      return mlir::failure();
-+
-+    outerLoop = nestedUnorderedLoop;
-+    outerLoopLiveIns = std::move(nestedLiveIns);
++    currentLoop = nestedUnorderedLoop;
 +  }
 +
 +  return mlir::success();
@@ -8551,10 +7914,6 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/DoConcurrent
 +                  "defining operation.");
 +    }
 +
-+    llvm::SmallVector<mlir::Value> outermostLoopLiveIns;
-+    looputils::collectLoopLiveIns(doLoop, outermostLoopLiveIns);
-+    assert(!outermostLoopLiveIns.empty());
-+
 +    looputils::LoopNestToIndVarMap loopNest;
 +    bool hasRemainingNestedLoops =
 +        failed(looputils::collectLoopNest(doLoop, loopNest));
@@ -8563,15 +7922,19 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/DoConcurrent
 +                        "Some `do concurent` loops are not perfectly-nested. "
 +                        "These will be serialzied.");
 +
++    llvm::SmallVector<mlir::Value> loopNestLiveIns;
++    looputils::collectLoopLiveIns(loopNest.back().first, loopNestLiveIns);
++    assert(!loopNestLiveIns.empty());
++
 +    llvm::SetVector<mlir::Value> locals;
 +    looputils::collectLoopLocalValues(loopNest.back().first, locals);
 +    // We do not want to map "loop-local" values to the device through
 +    // `omp.map.info` ops. Therefore, we remove them from the list of live-ins.
-+    outermostLoopLiveIns.erase(llvm::remove_if(outermostLoopLiveIns,
-+                                               [&](mlir::Value liveIn) {
-+                                                 return locals.contains(liveIn);
-+                                               }),
-+                               outermostLoopLiveIns.end());
++    loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns,
++                                          [&](mlir::Value liveIn) {
++                                            return locals.contains(liveIn);
++                                          }),
++                          loopNestLiveIns.end());
 +
 +    looputils::sinkLoopIVArgs(rewriter, loopNest);
 +
@@ -8587,24 +7950,25 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/DoConcurrent
 +                           loopNestClauseOps, &targetClauseOps);
 +
 +      // Prevent mapping host-evaluated variables.
-+      outermostLoopLiveIns.erase(
-+          llvm::remove_if(outermostLoopLiveIns,
++      loopNestLiveIns.erase(
++          llvm::remove_if(loopNestLiveIns,
 +                          [&](mlir::Value liveIn) {
 +                            return llvm::is_contained(
 +                                targetClauseOps.hostEvalVars, liveIn);
 +                          }),
-+          outermostLoopLiveIns.end());
++          loopNestLiveIns.end());
 +
 +      // The outermost loop will contain all the live-in values in all nested
 +      // loops since live-in values are collected recursively for all nested
 +      // ops.
-+      for (mlir::Value liveIn : outermostLoopLiveIns)
++      for (mlir::Value liveIn : loopNestLiveIns)
 +        targetClauseOps.mapVars.push_back(
 +            genMapInfoOpForLiveIn(rewriter, liveIn));
 +
 +      targetOp =
-+          genTargetOp(doLoop.getLoc(), rewriter, mapper, outermostLoopLiveIns,
++          genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns,
 +                      targetClauseOps, loopNestClauseOps);
++
 +      genTeamsOp(doLoop.getLoc(), rewriter);
 +    }
 +
@@ -8995,10 +8359,11 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/DoConcurrent
 +        context, mapTo == flangomp::DoConcurrentMappingKind::DCMK_Device,
 +        concurrentLoopsToSkip);
 +    mlir::ConversionTarget target(*context);
-+    target.addLegalDialect<
-+        fir::FIROpsDialect, hlfir::hlfirDialect, mlir::arith::ArithDialect,
-+        mlir::func::FuncDialect, mlir::omp::OpenMPDialect,
-+        mlir::cf::ControlFlowDialect, mlir::math::MathDialect>();
++    target
++        .addLegalDialect<fir::FIROpsDialect, hlfir::hlfirDialect,
++                         mlir::arith::ArithDialect, mlir::func::FuncDialect,
++                         mlir::omp::OpenMPDialect, mlir::cf::ControlFlowDialect,
++                         mlir::math::MathDialect, mlir::LLVM::LLVMDialect>();
 +
 +    target.addDynamicallyLegalOp<fir::DoLoopOp>([&](fir::DoLoopOp op) {
 +      return !op.getUnordered() || concurrentLoopsToSkip.contains(op);
@@ -9022,9 +8387,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/DoConcurrent
 +
 +  return std::make_unique<DoConcurrentConversionPass>(options);
 +}
-diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp llvm-project/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp
---- llvm-project.upstream/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp	2024-11-19 12:49:05.171149294 -0500
+diff -Naur -x .git llvm-project-trunk/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp llvm-project-trunk-atd/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp
+--- llvm-project-trunk/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/lib/Optimizer/OpenMP/GlobalFiltering.cpp	2024-10-18 17:40:53.608775921 -0500
 @@ -0,0 +1,70 @@
 +//===- GlobalFiltering.cpp ------------------------------------------------===//
 +//
@@ -9096,31 +8461,10 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/GlobalFilter
 +  }
 +};
 +} // namespace
-diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp llvm-project/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
---- llvm-project.upstream/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp	2024-11-19 12:55:58.486825754 -0500
-+++ llvm-project/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp	2024-11-19 12:49:05.171149294 -0500
-@@ -38,7 +38,10 @@
- #include "mlir/Support/LLVM.h"
- #include "llvm/ADT/SmallPtrSet.h"
- #include "llvm/Frontend/OpenMP/OMPConstants.h"
-+#include <algorithm>
-+#include <cstddef>
- #include <iterator>
-+#include <numeric>
- 
- namespace flangomp {
- #define GEN_PASS_DEF_MAPINFOFINALIZATIONPASS
-@@ -49,30 +52,79 @@
- class MapInfoFinalizationPass
-     : public flangomp::impl::MapInfoFinalizationPassBase<
-           MapInfoFinalizationPass> {
-+  /// Helper class tracking a members parent and its
-+  /// placement in the parents member list
-+  struct ParentAndPlacement {
-+    mlir::omp::MapInfoOp parent;
-+    size_t index;
-+  };
- 
+diff -Naur -x .git llvm-project-trunk/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp llvm-project-trunk-atd/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+--- llvm-project-trunk/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp	2024-11-23 20:25:46.219205510 -0600
++++ llvm-project-trunk-atd/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp	2024-11-23 20:26:13.815106288 -0600
+@@ -62,7 +62,7 @@
    /// Tracks any intermediate function/subroutine local allocations we
    /// generate for the descriptors of box type dummy arguments, so that
    /// we can retrieve it for subsequent reuses within the functions
@@ -9129,448 +8473,63 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/MapInfoFinal
    std::map</*descriptor opaque pointer=*/void *,
             /*corresponding local alloca=*/fir::AllocaOp>
        localBoxAllocas;
- 
--  void genDescriptorMemberMaps(mlir::omp::MapInfoOp op,
--                               fir::FirOpBuilder &builder,
--                               mlir::Operation *target) {
--    mlir::Location loc = op.getLoc();
--    mlir::Value descriptor = op.getVarPtr();
--
--    // If we enter this function, but the mapped type itself is not the
--    // descriptor, then it's likely the address of the descriptor so we
--    // must retrieve the descriptor SSA.
--    if (!fir::isTypeWithDescriptor(op.getVarType())) {
-+  /// getMemberUserList gathers all users of a particular MapInfoOp that are
-+  /// other MapInfoOp's and places them into the mapMemberUsers list, which
-+  /// records the map that the current argument MapInfoOp "op" is part of
-+  /// alongside the placement of "op" in the recorded users members list. The
-+  /// intent of the generated list is to find all MapInfoOp's that may be
-+  /// considered parents of the passed in "op" and in which it shows up in the
-+  /// member list, alongside collecting the placement information of "op" in its
-+  /// parents member list.
-+  void
-+  getMemberUserList(mlir::omp::MapInfoOp op,
-+                    llvm::SmallVectorImpl<ParentAndPlacement> &mapMemberUsers) {
-+    for (auto *users : op->getUsers())
-+      if (auto map = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(users))
-+        for (auto [i, mapMember] : llvm::enumerate(map.getMembers()))
-+          if (mapMember.getDefiningOp() == op)
-+            mapMemberUsers.push_back({map, i});
-+  }
-+
-+  /// Returns the integer numbers contained within the mlir::Attributes within
-+  /// the values array.
-+  llvm::SmallVector<int64_t>
-+  getAsIntegers(llvm::ArrayRef<mlir::Attribute> values) {
-+    llvm::SmallVector<int64_t> ints;
-+    ints.reserve(values.size());
-+    llvm::transform(values, std::back_inserter(ints),
-+                    [](mlir::Attribute value) {
-+                      return mlir::cast<mlir::IntegerAttr>(value).getInt();
-+                    });
-+    return ints;
-+  }
-+
-+  /// This function will expand a MapInfoOp's member indices back into a vector
-+  /// so that they can be trivially modified as unfortunately the attribute type
-+  /// that's used does not have modifiable fields at the moment (generally
-+  /// awkward to work with)
-+  void getMemberIndicesAsVectors(
-+      mlir::omp::MapInfoOp mapInfo,
-+      llvm::SmallVector<llvm::SmallVector<int64_t>> &indices) {
-+    indices.reserve(mapInfo.getMembersIndexAttr().getValue().size());
-+    for (auto v : mapInfo.getMembersIndexAttr().getValue()) {
-+      auto memberIndex = mlir::cast<mlir::ArrayAttr>(v);
-+      indices.push_back(getAsIntegers(memberIndex.getValue()));
-+    }
-+  }
-+
-+  /// When provided a MapInfoOp containing a descriptor type that
-+  /// we must expand into multiple maps this function will extract
-+  /// the value from it and return it, in certain cases we must
-+  /// generate a new allocation to store into so that the
-+  /// fir::BoxOffsetOp we utilise to access the descriptor datas
-+  /// base address can be utilised.
-+  mlir::Value getDescriptorFromBoxMap(mlir::omp::MapInfoOp boxMap,
-+                                      fir::FirOpBuilder &builder) {
-+    mlir::Value descriptor = boxMap.getVarPtr();
-+    if (!fir::isTypeWithDescriptor(boxMap.getVarType()))
-       if (auto addrOp = mlir::dyn_cast_if_present<fir::BoxAddrOp>(
--              op.getVarPtr().getDefiningOp())) {
-+              boxMap.getVarPtr().getDefiningOp()))
-         descriptor = addrOp.getVal();
--      }
--    }
- 
-     // The fir::BoxOffsetOp only works with !fir.ref<!fir.box<...>> types, as
-     // allowing it to access non-reference box operations can cause some
-@@ -81,13 +133,14 @@
-     // !fir.ref<!fir.box<...>> to access the data we need to map we must
-     // perform an alloca and then store to it and retrieve the data from the new
-     // alloca.
-+    // If we have already created a local allocation for this BoxType,
-+    // we must be sure to re-use it so that we end up with the same
-+    // allocations being utilised for the same descriptor across all map uses,
-+    // this prevents runtime issues such as not appropriately releasing or
-+    // deleting all mapped data.
-     if (mlir::isa<fir::BaseBoxType>(descriptor.getType())) {
--      // If we have already created a local allocation for this BoxType,
--      // we must be sure to re-use it so that we end up with the same
--      // allocations being utilised for the same descriptor across all map uses,
--      // this prevents runtime issues such as not appropriately releasing or
--      // deleting all mapped data.
-       auto find = localBoxAllocas.find(descriptor.getAsOpaquePointer());
-+      mlir::Location loc = boxMap->getLoc();
-       if (find != localBoxAllocas.end()) {
-         builder.create<fir::StoreOp>(loc, descriptor, find->second);
-         descriptor = find->second;
-@@ -104,84 +157,192 @@
-       }
-     }
- 
-+    return descriptor;
-+  }
-+
-+  /// Simple function that will generate a FIR operation accessing
-+  /// the descriptors base address (BoxOffsetOp) and then generate a
-+  /// MapInfoOp for it, the most important thing to note is that
-+  /// we normally move the bounds from the descriptor map onto the
-+  /// base address map.
-+  mlir::omp::MapInfoOp getBaseAddrMap(mlir::Value descriptor,
-+                                      mlir::OperandRange bounds,
-+                                      int64_t mapType,
-+                                      fir::FirOpBuilder &builder) {
-+    mlir::Location loc = descriptor.getLoc();
-     mlir::Value baseAddrAddr = builder.create<fir::BoxOffsetOp>(
-         loc, descriptor, fir::BoxFieldAttr::base_addr);
- 
-     // Member of the descriptor pointing at the allocated data
--    mlir::Value baseAddr = builder.create<mlir::omp::MapInfoOp>(
-+    return builder.create<mlir::omp::MapInfoOp>(
-         loc, baseAddrAddr.getType(), descriptor,
-         mlir::TypeAttr::get(llvm::cast<mlir::omp::PointerLikeType>(
-                                 fir::unwrapRefType(baseAddrAddr.getType()))
-                                 .getElementType()),
-         baseAddrAddr, /*members=*/mlir::SmallVector<mlir::Value>{},
--        /*member_index=*/mlir::DenseIntElementsAttr{}, op.getBounds(),
--        builder.getIntegerAttr(builder.getIntegerType(64, false),
--                               op.getMapType().value()),
-+        /*membersIndex=*/mlir::ArrayAttr{}, bounds,
-+        builder.getIntegerAttr(builder.getIntegerType(64, false), mapType),
-         builder.getAttr<mlir::omp::VariableCaptureKindAttr>(
-             mlir::omp::VariableCaptureKind::ByRef),
-         /*name=*/builder.getStringAttr(""),
-         /*partial_map=*/builder.getBoolAttr(false));
-+  }
- 
--    // TODO: map the addendum segment of the descriptor, similarly to the
--    // above base address/data pointer member.
--
--    auto addOperands = [&](mlir::OperandRange &operandsArr,
--                           mlir::MutableOperandRange &mutableOpRange,
--                           auto directiveOp) {
--      llvm::SmallVector<mlir::Value> newMapOps;
--      for (size_t i = 0; i < operandsArr.size(); ++i) {
--        if (operandsArr[i] == op) {
--          // Push new implicit maps generated for the descriptor.
--          newMapOps.push_back(baseAddr);
--
--          // for TargetOp's which have IsolatedFromAbove we must align the
--          // new additional map operand with an appropriate BlockArgument,
--          // as the printing and later processing currently requires a 1:1
--          // mapping of BlockArgs to MapInfoOp's at the same placement in
--          // each array (BlockArgs and MapOperands).
--          if (directiveOp) {
--            directiveOp.getRegion().insertArgument(i, baseAddr.getType(), loc);
--          }
--        }
--        newMapOps.push_back(operandsArr[i]);
-+  /// This function adjusts the member indices vector to include a new
-+  /// base address member, we take the position of the descriptor in
-+  /// the member indices list, which is the index data that the base
-+  /// addresses index will be based off of, as the base address is
-+  /// a member of the descriptor, we must also alter other members
-+  /// indices in the list to account for this new addition. This
-+  /// requires inserting into the middle of a member index vector
-+  /// in some cases (i.e. we could be accessing the member of a
-+  /// descriptor type with a subsequent map, so we must be sure to
-+  /// adjust any of these cases with the addition of the new base
-+  /// address index value).
-+  void adjustMemberIndices(
-+      llvm::SmallVector<llvm::SmallVector<int64_t>> &memberIndices,
-+      size_t memberIndex) {
-+    llvm::SmallVector<int64_t> baseAddrIndex = memberIndices[memberIndex];
-+    baseAddrIndex.push_back(0);
-+
-+    // If we find another member that is "derived/a member of" the descriptor
-+    // that is not the descriptor itself, we must insert a 0 for the new base
-+    // address we have just added for the descriptor into the list at the
-+    // appropriate position to maintain correctness of the positional/index data
-+    // for that member.
-+    size_t insertPosition =
-+        std::distance(baseAddrIndex.begin(), std::prev(baseAddrIndex.end()));
-+    for (size_t i = 0; i < memberIndices.size(); ++i) {
-+      if (memberIndices[i].size() > insertPosition &&
-+          std::equal(baseAddrIndex.begin(), std::prev(baseAddrIndex.end()),
-+                     memberIndices[i].begin())) {
-+        memberIndices[i].insert(
-+            std::next(memberIndices[i].begin(), insertPosition), 0);
-       }
--      mutableOpRange.assign(newMapOps);
--    };
--    if (auto mapClauseOwner =
--            llvm::dyn_cast<mlir::omp::MapClauseOwningOpInterface>(target)) {
--      mlir::OperandRange mapOperandsArr = mapClauseOwner.getMapVars();
--      mlir::MutableOperandRange mapMutableOpRange =
--          mapClauseOwner.getMapVarsMutable();
--      mlir::omp::TargetOp targetOp =
--          llvm::dyn_cast<mlir::omp::TargetOp>(target);
--      addOperands(mapOperandsArr, mapMutableOpRange, targetOp);
-     }
--    if (auto targetDataOp = llvm::dyn_cast<mlir::omp::TargetDataOp>(target)) {
--      mlir::OperandRange useDevAddrArr = targetDataOp.getUseDeviceAddrVars();
--      mlir::MutableOperandRange useDevAddrMutableOpRange =
--          targetDataOp.getUseDeviceAddrVarsMutable();
--      addOperands(useDevAddrArr, useDevAddrMutableOpRange, targetDataOp);
-+
-+    // Insert our newly created baseAddrIndex into the larger list of indices at
-+    // the correct location.
-+    memberIndices.insert(std::next(memberIndices.begin(), memberIndex + 1),
-+                         baseAddrIndex);
-+  }
-+
-+  /// Adjusts the descriptors map type the main alteration that is done
-+  /// currently is transforming the map type to OMP_MAP_TO where possible.
-+  // This is because we will always need to map the descriptor to device
-+  /// (or at the very least it seems to be the case currently with the
-+  /// current lowered kernel IR), as without the appropriate descriptor
-+  /// information on the device there is a risk of the kernel IR
-+  /// requesting for various data that will not have been copied to
-+  /// perform things like indexing, this can cause segfaults and
-+  /// memory access errors. However, we do not need this data mapped
-+  /// back to the host from the device, as we cannot alter the data
-+  /// via resizing or deletion on the device, this is specified in the
-+  /// OpenMP specification, so discarding any descriptor alterations via
-+  /// no map back is reasonable (and required for certain segments
-+  /// of descriptor data like the type descriptor that are global
-+  /// constants). This alteration is only unapplicable to
-+  /// target exit and target update currently, and that's due to
-+  /// target exit not allowing To mappings, and target update not
-+  /// allowing both to and from simultaneously. We currently try
-+  /// to maintain the implicit flag where neccesary, although, it
-+  /// does not seem strictly required.
-+  unsigned long getDescriptorMapType(unsigned long mapTypeFlag,
-+                                     mlir::Operation *target) {
-+    if (llvm::isa_and_nonnull<mlir::omp::TargetExitDataOp>(target) ||
-+        llvm::isa_and_nonnull<mlir::omp::TargetUpdateOp>(target))
-+      return mapTypeFlag;
-+
-+    bool hasImplicitMap =
-+        (llvm::omp::OpenMPOffloadMappingFlags(mapTypeFlag) &
-+         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT) ==
-+        llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
-+
-+    return llvm::to_underlying(
-+        hasImplicitMap
-+            ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
-+                  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
-+            : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO);
-+  }
-+
-+mlir::omp::MapInfoOp genDescriptorMemberMaps(mlir::omp::MapInfoOp op,
-+                                               fir::FirOpBuilder &builder,
-+                                               mlir::Operation *target) {
-+    llvm::SmallVector<ParentAndPlacement> mapMemberUsers;
-+    getMemberUserList(op, mapMemberUsers);
-+
-+    // TODO: map the addendum segment of the descriptor, similarly to the
-+    // base address/data pointer member.
-+    mlir::Value descriptor = getDescriptorFromBoxMap(op, builder);
-+    auto baseAddr = getBaseAddrMap(descriptor, op.getBounds(),
-+                                   op.getMapType().value_or(0), builder);
-+    mlir::ArrayAttr newMembersAttr;
-+    mlir::SmallVector<mlir::Value> newMembers;
-+    llvm::SmallVector<llvm::SmallVector<int64_t>> memberIndices;
-+
-+    if (!mapMemberUsers.empty() || !op.getMembers().empty())
-+      getMemberIndicesAsVectors(
-+          !mapMemberUsers.empty() ? mapMemberUsers[0].parent : op,
-+          memberIndices);
-+
-+    // If the operation that we are expanding with a descriptor has a user
-+    // (parent), then we have to expand the parents member indices to reflect
-+    // the adjusted member indices for the base address insertion. However, if
-+    // it does not then we are expanding a MapInfoOp without any pre-existing
-+    // member information to now have one new member for the base address or we
-+    // are expanding a parent that is a descriptor and we have to adjust all of
-+    // it's members to reflect the insertion of the base address.
-+    if (!mapMemberUsers.empty()) {
-+      // Currently, there should only be one user per map when this pass
-+      // is executed, either a parent map, holding the current map in its
-+      // member list, or a target operation that holds a map clause. This
-+      // may change in the future if we aim to refactor the MLIR for map
-+      // clauses to allow sharing of duplicate maps across target
-+      // operations.
-+      ParentAndPlacement mapUser = mapMemberUsers[0];
-+      adjustMemberIndices(memberIndices, mapUser.index);
-+      llvm::SmallVector<mlir::Value> newMemberOps;
-+      for (auto v : mapUser.parent.getMembers()) {
-+        newMemberOps.push_back(v);
-+        if (v == op)
-+          newMemberOps.push_back(baseAddr);
-+      }
-+      mapUser.parent.getMembersMutable().assign(newMemberOps);
-+      mapUser.parent.setMembersIndexAttr(
-+          builder.create2DIntegerArrayAttr(memberIndices));
-+    } else {
-+      newMembers.push_back(baseAddr);
-+      if (!op.getMembers().empty()) {
-+        for (auto &indices : memberIndices)
-+          indices.insert(indices.begin(), 0);
-+        memberIndices.insert(memberIndices.begin(), {0});
-+        newMembersAttr = builder.create2DIntegerArrayAttr(memberIndices);
-+        newMembers.append(op.getMembers().begin(), op.getMembers().end());
-+      } else {
-+        llvm::SmallVector<llvm::SmallVector<int64_t>> memberIdx = {{0}};
-+        newMembersAttr = builder.create2DIntegerArrayAttr(memberIdx);
-+      }
-     }
- 
--    mlir::Value newDescParentMapOp = builder.create<mlir::omp::MapInfoOp>(
--        op->getLoc(), op.getResult().getType(), descriptor,
--        mlir::TypeAttr::get(fir::unwrapRefType(descriptor.getType())),
--        /*varPtrPtr=*/mlir::Value{},
--        /*members=*/mlir::SmallVector<mlir::Value>{baseAddr},
--        /*members_index=*/
--        mlir::DenseIntElementsAttr::get(
--            mlir::VectorType::get(
--                llvm::ArrayRef<int64_t>({1, 1}),
--                mlir::IntegerType::get(builder.getContext(), 32)),
--            llvm::ArrayRef<int32_t>({0})),
--        /*bounds=*/mlir::SmallVector<mlir::Value>{},
--        builder.getIntegerAttr(builder.getIntegerType(64, false),
--                               op.getMapType().value()),
--        op.getMapCaptureTypeAttr(), op.getNameAttr(), op.getPartialMapAttr());
--    op.replaceAllUsesWith(newDescParentMapOp);
-+    mlir::omp::MapInfoOp newDescParentMapOp =
-+        builder.create<mlir::omp::MapInfoOp>(
-+            op->getLoc(), op.getResult().getType(), descriptor,
-+            mlir::TypeAttr::get(fir::unwrapRefType(descriptor.getType())),
-+            /*varPtrPtr=*/mlir::Value{}, newMembers, newMembersAttr,
-+            /*bounds=*/mlir::SmallVector<mlir::Value>{},
-+            builder.getIntegerAttr(
-+                builder.getIntegerType(64, false),
-+                getDescriptorMapType(op.getMapType().value_or(0), target)),
-+            op.getMapCaptureTypeAttr(), op.getNameAttr(),
-+            /*partial_map=*/builder.getBoolAttr(false));
-+    op.replaceAllUsesWith(newDescParentMapOp.getResult());
-     op->erase();
-+    return newDescParentMapOp;
-   }
- 
-+
-+
-+
-   // We add all mapped record members not directly used in the target region
-   // to the block arguments in front of their parent and we place them into
-   // the map operands list for consistency.
-@@ -225,32 +386,78 @@
-                                   fir::FirOpBuilder &builder,
-                                   mlir::Operation *target) {
-     auto mapClauseOwner =
--        llvm::dyn_cast<mlir::omp::MapClauseOwningOpInterface>(target);
-+        llvm::dyn_cast_if_present<mlir::omp::MapClauseOwningOpInterface>(
-+            target);
-+    // TargetDataOp is technically a MapClauseOwningOpInterface, so we 
-+    // do not need to explicitly check for the extra cases here for use_device
-+    // addr/ptr
+@@ -370,24 +370,23 @@
      if (!mapClauseOwner)
        return;
  
--    llvm::SmallVector<mlir::Value> newMapOps;
--    mlir::OperandRange mapVarsArr = mapClauseOwner.getMapVars();
--    auto targetOp = llvm::dyn_cast<mlir::omp::TargetOp>(target);
+-    auto addOperands = [&](mlir::MutableOperandRange &mutableOpRange,
 +    auto addOperands = [&](mlir::MutableOperandRange &mapVarsArr,
-+                           mlir::Operation *directiveOp,
-+                           unsigned mapArgsStart = 0) {
-+      llvm::SmallVector<mlir::Value> newMapOps;
-+      for (auto [i, mapVar] : llvm::enumerate(mapVarsArr)) {
-+        if (mapVar.get() != op) {
-+          newMapOps.push_back(mapVar.get());
-+          continue;
-+        }
+                            mlir::Operation *directiveOp,
+                            unsigned blockArgInsertIndex = 0) {
+-      if (!llvm::is_contained(mutableOpRange.getAsOperandRange(),
+-                              op.getResult()))
++      if (!llvm::is_contained(mapVarsArr.getAsOperandRange(), op.getResult()))
+         return;
  
--    for (size_t i = 0; i < mapVarsArr.size(); ++i) {
--      if (mapVarsArr[i] == op) {
-         for (auto [j, mapMember] : llvm::enumerate(op.getMembers())) {
-           newMapOps.push_back(mapMember);
--          // for TargetOp's which have IsolatedFromAbove we must align the
--          // new additional map operand with an appropriate BlockArgument,
--          // as the printing and later processing currently requires a 1:1
--          // mapping of BlockArgs to MapInfoOp's at the same placement in
--          // each array (BlockArgs and MapVars).
--          if (targetOp) {
--            targetOp.getRegion().insertArgument(i + j, mapMember.getType(),
--                                                targetOp->getLoc());
--          }
-+          if (directiveOp)
-+            directiveOp->getRegion(0).insertArgument(
-+                mapArgsStart + i + j, mapMember.getType(), mapMember.getLoc());
+       // There doesn't appear to be a simple way to convert MutableOperandRange
+       // to a vector currently, so we instead use a for_each to populate our
+       // vector.
+       llvm::SmallVector<mlir::Value> newMapOps;
+-      newMapOps.reserve(mutableOpRange.size());
++      newMapOps.reserve(mapVarsArr.size());
+       llvm::for_each(
+-          mutableOpRange.getAsOperandRange(),
++          mapVarsArr.getAsOperandRange(),
+           [&newMapOps](mlir::Value oper) { newMapOps.push_back(oper); });
+ 
+       for (auto mapMember : op.getMembers()) {
+-        if (llvm::is_contained(mutableOpRange.getAsOperandRange(), mapMember))
++        if (llvm::is_contained(mapVarsArr.getAsOperandRange(), mapMember))
+           continue;
+         newMapOps.push_back(mapMember);
+         if (directiveOp) {
+@@ -397,7 +396,7 @@
          }
-+        newMapOps.push_back(mapVar.get());
        }
--      newMapOps.push_back(mapVarsArr[i]);
+ 
+-      mutableOpRange.assign(newMapOps);
 +      mapVarsArr.assign(newMapOps);
-+    };
-+
-+    auto argIface =
-+        llvm::dyn_cast<mlir::omp::BlockArgOpenMPOpInterface>(target);
-+
-+    if (auto mapClauseOwner =
-+            llvm::dyn_cast<mlir::omp::MapClauseOwningOpInterface>(target)) {
+     };
+ 
+     auto argIface =
+@@ -405,14 +404,13 @@
+ 
+     if (auto mapClauseOwner =
+             llvm::dyn_cast<mlir::omp::MapClauseOwningOpInterface>(target)) {
+-      mlir::MutableOperandRange mapMutableOpRange =
+-          mapClauseOwner.getMapVarsMutable();
 +      mlir::MutableOperandRange mapVarsArr = mapClauseOwner.getMapVarsMutable();
-+      unsigned blockArgInsertIndex =
-+          argIface ? argIface.getMapBlockArgsStart() : 0;
-+      addOperands(mapVarsArr, llvm::dyn_cast<mlir::omp::TargetOp>(target),
-+                  blockArgInsertIndex);
-+    }
-+
-+    if (auto targetDataOp = llvm::dyn_cast<mlir::omp::TargetDataOp>(target)) {
-+      mlir::MutableOperandRange useDevAddrArr =
-+          targetDataOp.getUseDeviceAddrVarsMutable();
-+      addOperands(useDevAddrArr, target,
-+                  argIface.getUseDeviceAddrBlockArgsStart());
+       unsigned blockArgInsertIndex =
+           argIface
+               ? argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs()
+               : 0;
+       addOperands(
+-          mapMutableOpRange,
++          mapVarsArr,
+           llvm::dyn_cast_or_null<mlir::omp::TargetOp>(argIface.getOperation()),
+           blockArgInsertIndex);
      }
--    mapClauseOwner.getMapVarsMutable().assign(newMapOps);
-+  }
-+
-+  // We retrieve the first user that is a Target operation, there
-+  // should only be one currently, every MapInfoOp can be tied to
-+  // at most 1 Target operation and at the minimum no operation,
-+  // this may change in the future with IR cleanups/modifications
-+  // in which case this pass will need updated to support cases
-+  // where a map can have more than one user and more than one of
-+  // those users can be a Target operation. For now, we simply
-+  // return the first target operation encountered, which may
-+  // be on the parent MapInfoOp in the case of a member mapping
-+  // in which case we must traverse the MapInfoOp chain until we
-+  // find the first TargetOp user.
-+  mlir::Operation *getFirstTargetUser(mlir::omp::MapInfoOp mapOp) {
-+    for (auto *user : mapOp->getUsers()) {
-+      if (llvm::isa<mlir::omp::TargetOp, mlir::omp::TargetDataOp,
-+                    mlir::omp::TargetUpdateOp, mlir::omp::TargetExitDataOp,
-+                    mlir::omp::TargetEnterDataOp>(user))
-+        return user;
-+
-+      if (auto mapUser = llvm::dyn_cast_if_present<mlir::omp::MapInfoOp>(user))
-+        return getFirstTargetUser(mapUser);
-+    }
-+
-+    return nullptr;
-   }
- 
-   // This pass executes on omp::MapInfoOp's containing descriptor based types
-@@ -262,10 +469,7 @@
+@@ -466,10 +464,7 @@
    // operation (usually function) containing the MapInfoOp because this pass
    // will mutate siblings of MapInfoOp.
    void runOnOperation() override {
@@ -9582,7 +8541,7 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/MapInfoFinal
      fir::KindMapping kindMap = fir::getKindMapping(module);
      fir::FirOpBuilder builder{module, std::move(kindMap)};
  
-@@ -277,33 +481,38 @@
+@@ -481,7 +476,7 @@
      // ourselves to the possibility of race conditions while this pass
      // undergoes frequent re-iteration for the near future. So we loop
      // over function in the module and then map.info inside of those.
@@ -9591,66 +8550,10 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/MapInfoFinal
        // clear all local allocations we made for any boxes in any prior
        // iterations from previous function scopes.
        localBoxAllocas.clear();
- 
-       func->walk([&](mlir::omp::MapInfoOp op) {
-         // TODO: Currently only supports a single user for the MapInfoOp, this
--        // is fine for the moment as the Fortran Frontend will generate a
--        // new MapInfoOp per Target operation for the moment. However, when/if
--        // we optimise/cleanup the IR, it likely isn't too difficult to
--        // extend this function, it would require some modification to create a
--        // single new MapInfoOp per new MapInfoOp generated and share it across
--        // all users appropriately, making sure to only add a single member link
--        // per new generation for the original originating descriptor MapInfoOp.
-+        // is fine for the moment as the Fortran frontend will generate a
-+        // new MapInfoOp with at most one user currently, in the case of
-+        // members of other objects like derived types, the user would be the
-+        // parent, in cases where it's a regular non-member map the user would
-+        // be the target operation it is being mapped by.
-+        //
-+        // However, when/if we optimise/cleanup the IR we will have to extend
-+        // this pass to support multiple users, as I would imagine we may wish
-+        // to have a map be re-used by multiple users (e.g. across multiple
-+        // targets that map the variable and have identical map properties)
-         assert(llvm::hasSingleElement(op->getUsers()) &&
-                "OMPMapInfoFinalization currently only supports single users "
-                "of a MapInfoOp");
- 
--        if (!op.getMembers().empty()) {
--          addImplicitMembersToTarget(op, builder, *op->getUsers().begin());
--        } else if (fir::isTypeWithDescriptor(op.getVarType()) ||
--                   mlir::isa_and_present<fir::BoxAddrOp>(
--                       op.getVarPtr().getDefiningOp())) {
-+        if (fir::isTypeWithDescriptor(op.getVarType()) ||
-+            mlir::isa_and_present<fir::BoxAddrOp>(
-+                op.getVarPtr().getDefiningOp())) {
-           builder.setInsertionPoint(op);
--          genDescriptorMemberMaps(op, builder, *op->getUsers().begin());
-+          genDescriptorMemberMaps(op, builder, getFirstTargetUser(op));
-         }
-       });
-+
-+      func->walk([&](mlir::omp::MapInfoOp op) {
-+        addImplicitMembersToTarget(op, builder, getFirstTargetUser(op));
-+      });
-     });
-   }
- };
-diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp llvm-project/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
---- llvm-project.upstream/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp	2024-11-19 12:55:58.486825754 -0500
-+++ llvm-project/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp	2024-11-19 12:49:05.171149294 -0500
-@@ -92,7 +92,7 @@
-                           .getElementType()),
-         /*varPtrPtr=*/Value{},
-         /*members=*/SmallVector<Value>{},
--        /*member_index=*/DenseIntElementsAttr{},
-+        /*member_index=*/ArrayAttr{},
-         /*bounds=*/ValueRange{},
-         builder.getIntegerAttr(builder.getIntegerType(64, /*isSigned=*/false),
-                                mapTypeTo),
-diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/Passes/Pipelines.cpp llvm-project/flang/lib/Optimizer/Passes/Pipelines.cpp
---- llvm-project.upstream/flang/lib/Optimizer/Passes/Pipelines.cpp	2024-11-19 12:55:58.486825754 -0500
-+++ llvm-project/flang/lib/Optimizer/Passes/Pipelines.cpp	2024-11-19 12:49:05.171149294 -0500
-@@ -241,12 +241,21 @@
+diff -Naur -x .git llvm-project-trunk/flang/lib/Optimizer/Passes/Pipelines.cpp llvm-project-trunk-atd/flang/lib/Optimizer/Passes/Pipelines.cpp
+--- llvm-project-trunk/flang/lib/Optimizer/Passes/Pipelines.cpp	2024-11-23 20:25:46.219205510 -0600
++++ llvm-project-trunk-atd/flang/lib/Optimizer/Passes/Pipelines.cpp	2024-11-23 20:26:13.815106288 -0600
+@@ -243,12 +243,21 @@
  /// \param pm - MLIR pass manager that will hold the pipeline definition.
  /// \param isTargetDevice - Whether code is being generated for a target device
  /// rather than the host device.
@@ -9674,9 +8577,9 @@ diff -Naur -x .git llvm-project.upstream/flang/lib/Optimizer/Passes/Pipelines.cp
  }
  
  void createDebugPasses(mlir::PassManager &pm,
-diff -Naur -x .git llvm-project.upstream/flang/runtime/array-constructor.cpp llvm-project/flang/runtime/array-constructor.cpp
---- llvm-project.upstream/flang/runtime/array-constructor.cpp	2024-01-02 11:37:59.673301288 -0500
-+++ llvm-project/flang/runtime/array-constructor.cpp	2024-11-19 12:49:05.185149250 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/array-constructor.cpp llvm-project-trunk-atd/flang/runtime/array-constructor.cpp
+--- llvm-project-trunk/flang/runtime/array-constructor.cpp	2024-08-27 20:04:05.636040902 -0500
++++ llvm-project-trunk-atd/flang/runtime/array-constructor.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -176,7 +176,7 @@
    AllocateOrReallocateVectorIfNeeded(vector, terminator, to.Elements(), 1);
    SubscriptValue subscript[1]{
@@ -9686,9 +8589,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/array-constructor.cpp llv
    ++vector.nextValuePosition;
  }
  
-diff -Naur -x .git llvm-project.upstream/flang/runtime/assign.cpp llvm-project/flang/runtime/assign.cpp
---- llvm-project.upstream/flang/runtime/assign.cpp	2024-11-19 12:55:58.493825731 -0500
-+++ llvm-project/flang/runtime/assign.cpp	2024-11-19 12:49:05.185149250 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/assign.cpp llvm-project-trunk-atd/flang/runtime/assign.cpp
+--- llvm-project-trunk/flang/runtime/assign.cpp	2024-11-23 20:25:46.227205482 -0600
++++ llvm-project-trunk-atd/flang/runtime/assign.cpp	2024-11-23 20:26:13.827106246 -0600
 @@ -263,7 +263,7 @@
    if (MayAlias(to, from)) {
      if (mustDeallocateLHS) {
@@ -9707,9 +8610,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/assign.cpp llvm-project/f
        // Pretend the temporary descriptor is for an ALLOCATABLE
        // entity, otherwise, the Deallocate() below will not
        // free the descriptor memory.
-diff -Naur -x .git llvm-project.upstream/flang/runtime/buffer.h llvm-project/flang/runtime/buffer.h
---- llvm-project.upstream/flang/runtime/buffer.h	2024-04-11 17:07:42.113532009 -0400
-+++ llvm-project/flang/runtime/buffer.h	2024-11-19 12:49:05.185149250 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/buffer.h llvm-project-trunk-atd/flang/runtime/buffer.h
+--- llvm-project-trunk/flang/runtime/buffer.h	2024-08-27 20:04:05.636040902 -0500
++++ llvm-project-trunk-atd/flang/runtime/buffer.h	2024-10-18 17:40:53.624775756 -0500
 @@ -158,8 +158,8 @@
        // Avoid passing a null pointer, since it would result in an undefined
        // behavior.
@@ -9721,9 +8624,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/buffer.h llvm-project/fla
          FreeMemory(old);
        }
        start_ = 0;
-diff -Naur -x .git llvm-project.upstream/flang/runtime/character.cpp llvm-project/flang/runtime/character.cpp
---- llvm-project.upstream/flang/runtime/character.cpp	2024-01-02 11:37:59.673301288 -0500
-+++ llvm-project/flang/runtime/character.cpp	2024-11-19 12:49:05.186149247 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/character.cpp llvm-project-trunk-atd/flang/runtime/character.cpp
+--- llvm-project-trunk/flang/runtime/character.cpp	2024-08-27 20:04:05.636040902 -0500
++++ llvm-project-trunk-atd/flang/runtime/character.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -596,8 +596,8 @@
    from.GetLowerBounds(fromAt);
    for (; elements-- > 0;
@@ -9780,9 +8683,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/character.cpp llvm-projec
  }
  
  std::size_t RTDEF(Verify1)(const char *x, std::size_t xLen, const char *set,
-diff -Naur -x .git llvm-project.upstream/flang/runtime/CMakeLists.txt llvm-project/flang/runtime/CMakeLists.txt
---- llvm-project.upstream/flang/runtime/CMakeLists.txt	2024-11-19 12:55:00.580014599 -0500
-+++ llvm-project/flang/runtime/CMakeLists.txt	2024-09-24 17:16:21.644863884 -0400
+diff -Naur -x .git llvm-project-trunk/flang/runtime/CMakeLists.txt llvm-project-trunk-atd/flang/runtime/CMakeLists.txt
+--- llvm-project-trunk/flang/runtime/CMakeLists.txt	2024-10-18 17:40:44.692867333 -0500
++++ llvm-project-trunk-atd/flang/runtime/CMakeLists.txt	2024-09-24 18:08:02.227356478 -0500
 @@ -308,12 +308,12 @@
  # If FortranRuntime is part of a Flang build (and not a separate build) then
  # add dependency to make sure that Fortran runtime library is being built after
@@ -9799,9 +8702,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/CMakeLists.txt llvm-proje
  endif()
  
  if (FLANG_CUF_RUNTIME)
-diff -Naur -x .git llvm-project.upstream/flang/runtime/command.cpp llvm-project/flang/runtime/command.cpp
---- llvm-project.upstream/flang/runtime/command.cpp	2024-08-19 13:39:47.474756097 -0400
-+++ llvm-project/flang/runtime/command.cpp	2024-11-19 12:49:05.186149247 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/command.cpp llvm-project-trunk-atd/flang/runtime/command.cpp
+--- llvm-project-trunk/flang/runtime/command.cpp	2024-08-27 20:04:05.636040902 -0500
++++ llvm-project-trunk-atd/flang/runtime/command.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -54,7 +54,7 @@
  
  static void FillWithSpaces(const Descriptor &value, std::size_t offset = 0) {
@@ -9811,9 +8714,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/command.cpp llvm-project/
          value.OffsetElement(offset), ' ', value.ElementBytes() - offset);
    }
  }
-diff -Naur -x .git llvm-project.upstream/flang/runtime/copy.cpp llvm-project/flang/runtime/copy.cpp
---- llvm-project.upstream/flang/runtime/copy.cpp	2024-09-09 10:42:38.964550365 -0400
-+++ llvm-project/flang/runtime/copy.cpp	2024-11-19 12:49:05.186149247 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/copy.cpp llvm-project-trunk-atd/flang/runtime/copy.cpp
+--- llvm-project-trunk/flang/runtime/copy.cpp	2024-08-28 09:52:17.601936804 -0500
++++ llvm-project-trunk-atd/flang/runtime/copy.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -9,6 +9,7 @@
  #include "copy.h"
  #include "stack.h"
@@ -9840,9 +8743,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/copy.cpp llvm-project/fla
      --elements;
      if (elements != 0) {
        currentCopy.IncrementSubscripts(terminator);
-diff -Naur -x .git llvm-project.upstream/flang/runtime/derived.cpp llvm-project/flang/runtime/derived.cpp
---- llvm-project.upstream/flang/runtime/derived.cpp	2024-08-19 13:39:47.474756097 -0400
-+++ llvm-project/flang/runtime/derived.cpp	2024-11-19 12:49:05.186149247 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/derived.cpp llvm-project-trunk-atd/flang/runtime/derived.cpp
+--- llvm-project-trunk/flang/runtime/derived.cpp	2024-08-27 20:04:05.640040890 -0500
++++ llvm-project-trunk-atd/flang/runtime/derived.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -73,7 +73,7 @@
        std::size_t bytes{comp.SizeInBytes(instance)};
        for (std::size_t j{0}; j++ < elements; instance.IncrementSubscripts(at)) {
@@ -9852,9 +8755,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/derived.cpp llvm-project/
        }
      } else if (comp.genre() == typeInfo::Component::Genre::Pointer) {
        // Data pointers without explicit initialization are established
-diff -Naur -x .git llvm-project.upstream/flang/runtime/descriptor.cpp llvm-project/flang/runtime/descriptor.cpp
---- llvm-project.upstream/flang/runtime/descriptor.cpp	2024-08-19 13:39:47.474756097 -0400
-+++ llvm-project/flang/runtime/descriptor.cpp	2024-11-19 12:49:05.186149247 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/descriptor.cpp llvm-project-trunk-atd/flang/runtime/descriptor.cpp
+--- llvm-project-trunk/flang/runtime/descriptor.cpp	2024-08-27 20:04:05.640040890 -0500
++++ llvm-project-trunk-atd/flang/runtime/descriptor.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -26,7 +26,7 @@
  RT_API_ATTRS Descriptor::Descriptor(const Descriptor &that) { *this = that; }
  
@@ -9864,9 +8767,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/descriptor.cpp llvm-proje
    return *this;
  }
  
-diff -Naur -x .git llvm-project.upstream/flang/runtime/descriptor-io.cpp llvm-project/flang/runtime/descriptor-io.cpp
---- llvm-project.upstream/flang/runtime/descriptor-io.cpp	2024-04-11 17:07:42.113532009 -0400
-+++ llvm-project/flang/runtime/descriptor-io.cpp	2024-11-19 12:49:05.186149247 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/descriptor-io.cpp llvm-project-trunk-atd/flang/runtime/descriptor-io.cpp
+--- llvm-project-trunk/flang/runtime/descriptor-io.cpp	2024-08-27 20:04:05.640040890 -0500
++++ llvm-project-trunk-atd/flang/runtime/descriptor-io.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -32,7 +32,7 @@
      if (edit.descriptor == DataEdit::DefinedDerivedType) {
        ioType[0] = 'D';
@@ -9876,9 +8779,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/descriptor-io.cpp llvm-pr
      } else {
        runtime::strcpy(
            ioType, io.mutableModes().inNamelist ? "NAMELIST" : "LISTDIRECTED");
-diff -Naur -x .git llvm-project.upstream/flang/runtime/edit-input.cpp llvm-project/flang/runtime/edit-input.cpp
---- llvm-project.upstream/flang/runtime/edit-input.cpp	2024-11-19 12:55:00.583014589 -0500
-+++ llvm-project/flang/runtime/edit-input.cpp	2024-11-19 12:49:05.186149247 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/edit-input.cpp llvm-project-trunk-atd/flang/runtime/edit-input.cpp
+--- llvm-project-trunk/flang/runtime/edit-input.cpp	2024-09-24 18:07:14.587866386 -0500
++++ llvm-project-trunk-atd/flang/runtime/edit-input.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -121,7 +121,7 @@
    io.HandleAbsolutePosition(start);
    remaining.reset();
@@ -9918,9 +8821,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/edit-input.cpp llvm-proje
          x += chunkBytes;
          lengthChars -= chunkChars;
        }
-diff -Naur -x .git llvm-project.upstream/flang/runtime/extensions.cpp llvm-project/flang/runtime/extensions.cpp
---- llvm-project.upstream/flang/runtime/extensions.cpp	2024-11-19 12:55:00.583014589 -0500
-+++ llvm-project/flang/runtime/extensions.cpp	2024-11-19 12:49:05.186149247 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/extensions.cpp llvm-project-trunk-atd/flang/runtime/extensions.cpp
+--- llvm-project-trunk/flang/runtime/extensions.cpp	2024-10-18 17:40:44.692867333 -0500
++++ llvm-project-trunk-atd/flang/runtime/extensions.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -78,7 +78,7 @@
  
  void GetUsernameEnvVar(const char *envName, char *arg, std::int64_t length) {
@@ -9959,9 +8862,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/extensions.cpp llvm-proje
      newName[nameLength] = '\0';
      name = newName;
    }
-diff -Naur -x .git llvm-project.upstream/flang/runtime/external-unit.cpp llvm-project/flang/runtime/external-unit.cpp
---- llvm-project.upstream/flang/runtime/external-unit.cpp	2024-09-09 10:42:56.136492432 -0400
-+++ llvm-project/flang/runtime/external-unit.cpp	2024-11-19 12:49:05.187149244 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/external-unit.cpp llvm-project-trunk-atd/flang/runtime/external-unit.cpp
+--- llvm-project-trunk/flang/runtime/external-unit.cpp	2024-08-28 09:52:17.601936804 -0500
++++ llvm-project-trunk-atd/flang/runtime/external-unit.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -122,7 +122,7 @@
    bool impliedClose{false};
    if (IsConnected()) {
@@ -9980,9 +8883,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/external-unit.cpp llvm-pr
        convert, handler);
    return IsConnected();
  }
-diff -Naur -x .git llvm-project.upstream/flang/runtime/extrema.cpp llvm-project/flang/runtime/extrema.cpp
---- llvm-project.upstream/flang/runtime/extrema.cpp	2024-11-19 12:55:00.584014586 -0500
-+++ llvm-project/flang/runtime/extrema.cpp	2024-11-19 12:49:05.187149244 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/extrema.cpp llvm-project-trunk-atd/flang/runtime/extrema.cpp
+--- llvm-project-trunk/flang/runtime/extrema.cpp	2024-09-24 18:07:30.751693355 -0500
++++ llvm-project-trunk-atd/flang/runtime/extrema.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -374,7 +374,7 @@
        CreatePartialReductionResult(result, x,
            Descriptor::BytesFor(TypeCategory::Integer, kind), dim, terminator,
@@ -10006,9 +8909,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/extrema.cpp llvm-project/
      }
    }
    RT_API_ATTRS bool Accumulate(const Type *x) {
-diff -Naur -x .git llvm-project.upstream/flang/runtime/format-implementation.h llvm-project/flang/runtime/format-implementation.h
---- llvm-project.upstream/flang/runtime/format-implementation.h	2024-11-19 12:54:46.316060984 -0500
-+++ llvm-project/flang/runtime/format-implementation.h	2024-11-19 12:49:05.187149244 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/format-implementation.h llvm-project-trunk-atd/flang/runtime/format-implementation.h
+--- llvm-project-trunk/flang/runtime/format-implementation.h	2024-09-13 09:56:37.364632775 -0500
++++ llvm-project-trunk-atd/flang/runtime/format-implementation.h	2024-10-18 17:40:53.624775756 -0500
 @@ -49,7 +49,7 @@
        SubscriptValue at[maxRank];
        formatDescriptor->GetLowerBounds(at);
@@ -10018,9 +8921,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/format-implementation.h l
          p += elementBytes;
          formatDescriptor->IncrementSubscripts(at);
        }
-diff -Naur -x .git llvm-project.upstream/flang/runtime/internal-unit.cpp llvm-project/flang/runtime/internal-unit.cpp
---- llvm-project.upstream/flang/runtime/internal-unit.cpp	2024-08-19 13:39:47.474756097 -0400
-+++ llvm-project/flang/runtime/internal-unit.cpp	2024-11-19 12:49:05.187149244 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/internal-unit.cpp llvm-project-trunk-atd/flang/runtime/internal-unit.cpp
+--- llvm-project-trunk/flang/runtime/internal-unit.cpp	2024-08-27 20:04:05.640040890 -0500
++++ llvm-project-trunk-atd/flang/runtime/internal-unit.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -70,7 +70,7 @@
        BlankFill(record + furthestPositionInRecord,
            positionInRecord - furthestPositionInRecord);
@@ -10030,9 +8933,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/internal-unit.cpp llvm-pr
      positionInRecord += bytes;
      furthestPositionInRecord = furthestAfter;
      return ok;
-diff -Naur -x .git llvm-project.upstream/flang/runtime/io-error.cpp llvm-project/flang/runtime/io-error.cpp
---- llvm-project.upstream/flang/runtime/io-error.cpp	2024-04-11 17:07:42.113532009 -0400
-+++ llvm-project/flang/runtime/io-error.cpp	2024-11-19 12:49:05.187149244 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/io-error.cpp llvm-project-trunk-atd/flang/runtime/io-error.cpp
+--- llvm-project-trunk/flang/runtime/io-error.cpp	2024-08-27 20:04:05.640040890 -0500
++++ llvm-project-trunk-atd/flang/runtime/io-error.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -151,7 +151,7 @@
    } else if (ok) {
      std::size_t copied{Fortran::runtime::strlen(buffer)};
@@ -10042,9 +8945,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/io-error.cpp llvm-project
      }
      return true;
    } else {
-diff -Naur -x .git llvm-project.upstream/flang/runtime/matmul.cpp llvm-project/flang/runtime/matmul.cpp
---- llvm-project.upstream/flang/runtime/matmul.cpp	2024-11-19 12:55:00.584014586 -0500
-+++ llvm-project/flang/runtime/matmul.cpp	2024-11-19 12:49:05.188149241 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/matmul.cpp llvm-project-trunk-atd/flang/runtime/matmul.cpp
+--- llvm-project-trunk/flang/runtime/matmul.cpp	2024-09-24 18:07:30.751693355 -0500
++++ llvm-project-trunk-atd/flang/runtime/matmul.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -81,7 +81,7 @@
      SubscriptValue n, std::size_t xColumnByteStride = 0,
      std::size_t yColumnByteStride = 0) {
@@ -10072,9 +8975,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/matmul.cpp llvm-project/f
    for (SubscriptValue k{0}; k < n; ++k) {
      ResultType *RESTRICT p{product};
      auto xv{static_cast<ResultType>(*x++)};
-diff -Naur -x .git llvm-project.upstream/flang/runtime/matmul-transpose.cpp llvm-project/flang/runtime/matmul-transpose.cpp
---- llvm-project.upstream/flang/runtime/matmul-transpose.cpp	2024-11-19 12:55:00.584014586 -0500
-+++ llvm-project/flang/runtime/matmul-transpose.cpp	2024-11-19 12:49:05.188149241 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/matmul-transpose.cpp llvm-project-trunk-atd/flang/runtime/matmul-transpose.cpp
+--- llvm-project-trunk/flang/runtime/matmul-transpose.cpp	2024-09-24 18:07:30.751693355 -0500
++++ llvm-project-trunk-atd/flang/runtime/matmul-transpose.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -62,7 +62,7 @@
      std::size_t yColumnByteStride = 0) {
    using ResultType = CppTypeFor<RCAT, RKIND>;
@@ -10093,9 +8996,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/matmul-transpose.cpp llvm
    for (SubscriptValue i{0}; i < rows; ++i) {
      for (SubscriptValue k{0}; k < n; ++k) {
        ResultType x_ki;
-diff -Naur -x .git llvm-project.upstream/flang/runtime/misc-intrinsic.cpp llvm-project/flang/runtime/misc-intrinsic.cpp
---- llvm-project.upstream/flang/runtime/misc-intrinsic.cpp	2024-08-19 13:39:47.474756097 -0400
-+++ llvm-project/flang/runtime/misc-intrinsic.cpp	2024-11-19 12:49:05.188149241 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/misc-intrinsic.cpp llvm-project-trunk-atd/flang/runtime/misc-intrinsic.cpp
+--- llvm-project-trunk/flang/runtime/misc-intrinsic.cpp	2024-08-27 20:04:05.644040877 -0500
++++ llvm-project-trunk-atd/flang/runtime/misc-intrinsic.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -42,14 +42,14 @@
    source.GetLowerBounds(sourceAt);
    while (resultBytes > 0 && sourceElements > 0) {
@@ -10113,9 +9016,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/misc-intrinsic.cpp llvm-p
    }
  }
  
-diff -Naur -x .git llvm-project.upstream/flang/runtime/pseudo-unit.cpp llvm-project/flang/runtime/pseudo-unit.cpp
---- llvm-project.upstream/flang/runtime/pseudo-unit.cpp	2024-07-09 19:05:25.622801628 -0400
-+++ llvm-project/flang/runtime/pseudo-unit.cpp	2024-11-19 12:49:05.188149241 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/pseudo-unit.cpp llvm-project-trunk-atd/flang/runtime/pseudo-unit.cpp
+--- llvm-project-trunk/flang/runtime/pseudo-unit.cpp	2024-08-27 20:04:05.644040877 -0500
++++ llvm-project-trunk-atd/flang/runtime/pseudo-unit.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -132,7 +132,7 @@
    // TODO: use persistent string buffer that can be reallocated
    // as needed, and only freed at destruction of *this.
@@ -10125,9 +9028,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/pseudo-unit.cpp llvm-proj
    string.get()[bytes] = '\0';
    std::printf("%s", string.get());
    return bytes;
-diff -Naur -x .git llvm-project.upstream/flang/runtime/ragged.cpp llvm-project/flang/runtime/ragged.cpp
---- llvm-project.upstream/flang/runtime/ragged.cpp	2024-01-02 11:37:59.677301279 -0500
-+++ llvm-project/flang/runtime/ragged.cpp	2024-11-19 12:49:05.188149241 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/ragged.cpp llvm-project-trunk-atd/flang/runtime/ragged.cpp
+--- llvm-project-trunk/flang/runtime/ragged.cpp	2024-08-27 20:04:05.644040877 -0500
++++ llvm-project-trunk-atd/flang/runtime/ragged.cpp	2024-10-18 17:40:53.624775756 -0500
 @@ -40,7 +40,7 @@
      std::size_t bytes{static_cast<std::size_t>(elementSize * size)};
      header->bufferPointer = AllocateMemoryOrCrash(terminator, bytes);
@@ -10137,9 +9040,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/ragged.cpp llvm-project/f
      }
      return header;
    } else {
-diff -Naur -x .git llvm-project.upstream/flang/runtime/reduce.cpp llvm-project/flang/runtime/reduce.cpp
---- llvm-project.upstream/flang/runtime/reduce.cpp	2024-11-19 12:55:00.586014580 -0500
-+++ llvm-project/flang/runtime/reduce.cpp	2024-11-19 12:49:05.188149241 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/reduce.cpp llvm-project-trunk-atd/flang/runtime/reduce.cpp
+--- llvm-project-trunk/flang/runtime/reduce.cpp	2024-09-24 18:07:30.751693355 -0500
++++ llvm-project-trunk-atd/flang/runtime/reduce.cpp	2024-10-18 17:40:53.628775715 -0500
 @@ -79,16 +79,16 @@
        activeTemp_ = 1 - activeTemp_;
      } else {
@@ -10160,9 +9063,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/reduce.cpp llvm-project/f
      } else {
        terminator_.Crash("REDUCE() without IDENTITY= has no result");
      }
-diff -Naur -x .git llvm-project.upstream/flang/runtime/stat.cpp llvm-project/flang/runtime/stat.cpp
---- llvm-project.upstream/flang/runtime/stat.cpp	2024-01-25 17:58:57.680692808 -0500
-+++ llvm-project/flang/runtime/stat.cpp	2024-11-19 12:49:05.188149241 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/stat.cpp llvm-project-trunk-atd/flang/runtime/stat.cpp
+--- llvm-project-trunk/flang/runtime/stat.cpp	2024-08-27 20:04:05.644040877 -0500
++++ llvm-project-trunk-atd/flang/runtime/stat.cpp	2024-10-18 17:40:53.628775715 -0500
 @@ -84,10 +84,10 @@
        std::size_t bufferLength{errmsg->ElementBytes()};
        std::size_t msgLength{Fortran::runtime::strlen(msg)};
@@ -10177,9 +9080,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/stat.cpp llvm-project/fla
        }
      }
    }
-diff -Naur -x .git llvm-project.upstream/flang/runtime/time-intrinsic.cpp llvm-project/flang/runtime/time-intrinsic.cpp
---- llvm-project.upstream/flang/runtime/time-intrinsic.cpp	2024-11-19 12:54:46.316060984 -0500
-+++ llvm-project/flang/runtime/time-intrinsic.cpp	2024-11-19 12:49:05.188149241 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/time-intrinsic.cpp llvm-project-trunk-atd/flang/runtime/time-intrinsic.cpp
+--- llvm-project-trunk/flang/runtime/time-intrinsic.cpp	2024-09-13 09:56:37.368632698 -0500
++++ llvm-project-trunk-atd/flang/runtime/time-intrinsic.cpp	2024-10-18 17:40:53.628775715 -0500
 @@ -221,13 +221,13 @@
      char *zone, std::size_t zoneChars,
      const Fortran::runtime::Descriptor *values) {
@@ -10206,9 +9109,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/time-intrinsic.cpp llvm-p
          for (auto i{copyLen}; i < destChars; ++i) {
            dest[i] = ' ';
          }
-diff -Naur -x .git llvm-project.upstream/flang/runtime/tools.cpp llvm-project/flang/runtime/tools.cpp
---- llvm-project.upstream/flang/runtime/tools.cpp	2024-03-26 14:53:00.492745372 -0400
-+++ llvm-project/flang/runtime/tools.cpp	2024-11-19 12:49:05.188149241 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/tools.cpp llvm-project-trunk-atd/flang/runtime/tools.cpp
+--- llvm-project-trunk/flang/runtime/tools.cpp	2024-08-27 20:04:05.644040877 -0500
++++ llvm-project-trunk-atd/flang/runtime/tools.cpp	2024-10-18 17:40:53.628775715 -0500
 @@ -28,7 +28,7 @@
      const char *s, std::size_t length, const Terminator &terminator) {
    if (s) {
@@ -10286,9 +9189,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/tools.cpp llvm-project/fl
  
    if (static_cast<std::int64_t>(rawValueLength) > toCopy) {
      return ToErrmsg(errmsg, StatValueTooShort);
-diff -Naur -x .git llvm-project.upstream/flang/runtime/tools.h llvm-project/flang/runtime/tools.h
---- llvm-project.upstream/flang/runtime/tools.h	2024-05-28 09:52:16.160895684 -0400
-+++ llvm-project/flang/runtime/tools.h	2024-11-19 12:49:05.189149238 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/tools.h llvm-project-trunk-atd/flang/runtime/tools.h
+--- llvm-project-trunk/flang/runtime/tools.h	2024-08-27 20:04:05.644040877 -0500
++++ llvm-project-trunk-atd/flang/runtime/tools.h	2024-10-18 17:40:53.628775715 -0500
 @@ -521,9 +521,9 @@
        to[j] = static_cast<TO>(' ');
      }
@@ -10301,9 +9204,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/tools.h llvm-project/flan
      for (std::size_t j{fromChars}; j < toChars; ++j) {
        to[j] = static_cast<TO>(' ');
      }
-diff -Naur -x .git llvm-project.upstream/flang/runtime/transformational.cpp llvm-project/flang/runtime/transformational.cpp
---- llvm-project.upstream/flang/runtime/transformational.cpp	2024-11-19 12:55:58.493825731 -0500
-+++ llvm-project/flang/runtime/transformational.cpp	2024-11-19 12:49:05.189149238 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/transformational.cpp llvm-project-trunk-atd/flang/runtime/transformational.cpp
+--- llvm-project-trunk/flang/runtime/transformational.cpp	2024-11-23 20:25:46.227205482 -0600
++++ llvm-project-trunk-atd/flang/runtime/transformational.cpp	2024-11-23 20:26:13.827106246 -0600
 @@ -114,7 +114,7 @@
            "not yet implemented: CHARACTER(KIND=%d) in EOSHIFT intrinsic", kind);
      }
@@ -10313,9 +9216,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/transformational.cpp llvm
    }
  }
  
-diff -Naur -x .git llvm-project.upstream/flang/runtime/unit.cpp llvm-project/flang/runtime/unit.cpp
---- llvm-project.upstream/flang/runtime/unit.cpp	2024-08-19 13:39:47.478756084 -0400
-+++ llvm-project/flang/runtime/unit.cpp	2024-11-19 12:49:05.189149238 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/unit.cpp llvm-project-trunk-atd/flang/runtime/unit.cpp
+--- llvm-project-trunk/flang/runtime/unit.cpp	2024-08-27 20:04:05.644040877 -0500
++++ llvm-project-trunk-atd/flang/runtime/unit.cpp	2024-10-18 17:40:53.628775715 -0500
 @@ -90,11 +90,11 @@
    CheckDirectAccess(handler);
    WriteFrame(frameOffsetInFile_, recordOffsetInFrame_ + furthestAfter, handler);
@@ -10357,9 +9260,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/unit.cpp llvm-project/fla
    if (swapEndianness_) {
      SwapEndianness(wordPtr, sizeof word, sizeof word);
    }
-diff -Naur -x .git llvm-project.upstream/flang/runtime/unit-map.cpp llvm-project/flang/runtime/unit-map.cpp
---- llvm-project.upstream/flang/runtime/unit-map.cpp	2024-03-26 14:53:00.492745372 -0400
-+++ llvm-project/flang/runtime/unit-map.cpp	2024-11-19 12:49:05.189149238 -0500
+diff -Naur -x .git llvm-project-trunk/flang/runtime/unit-map.cpp llvm-project-trunk-atd/flang/runtime/unit-map.cpp
+--- llvm-project-trunk/flang/runtime/unit-map.cpp	2024-08-27 20:04:05.644040877 -0500
++++ llvm-project-trunk-atd/flang/runtime/unit-map.cpp	2024-10-18 17:40:53.628775715 -0500
 @@ -118,7 +118,7 @@
      for (int j{0}; j < buckets_; ++j) {
        for (Chain *p{bucket_[j].get()}; p; p = p->next.get()) {
@@ -10369,9 +9272,9 @@ diff -Naur -x .git llvm-project.upstream/flang/runtime/unit-map.cpp llvm-project
            return &p->unit;
          }
        }
-diff -Naur -x .git llvm-project.upstream/flang/test/CMakeLists.txt llvm-project/flang/test/CMakeLists.txt
---- llvm-project.upstream/flang/test/CMakeLists.txt	2024-11-19 12:55:00.587014576 -0500
-+++ llvm-project/flang/test/CMakeLists.txt	2024-09-24 17:16:21.652863855 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/CMakeLists.txt llvm-project-trunk-atd/flang/test/CMakeLists.txt
+--- llvm-project-trunk/flang/test/CMakeLists.txt	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/CMakeLists.txt	2024-09-13 09:59:03.549878135 -0500
 @@ -58,7 +58,7 @@
    flang_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py)
  
@@ -10381,18 +9284,18 @@ diff -Naur -x .git llvm-project.upstream/flang/test/CMakeLists.txt llvm-project/
    llvm-config
    FileCheck
    count
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/aarch64-outline-atomics.f90 llvm-project/flang/test/Driver/aarch64-outline-atomics.f90
---- llvm-project.upstream/flang/test/Driver/aarch64-outline-atomics.f90	2024-11-19 12:55:00.587014576 -0500
-+++ llvm-project/flang/test/Driver/aarch64-outline-atomics.f90	2024-02-19 15:30:03.889299656 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/aarch64-outline-atomics.f90 llvm-project-trunk-atd/flang/test/Driver/aarch64-outline-atomics.f90
+--- llvm-project-trunk/flang/test/Driver/aarch64-outline-atomics.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/aarch64-outline-atomics.f90	2024-08-27 20:12:45.257833450 -0500
 @@ -1,4 +1,4 @@
 -! Test that flang forwards the -moutline-atomics and -mno-outline-atomics.
 +! Test that flang-new forwards the -moutline-atomics and -mno-outline-atomics.
  ! RUN: %flang -moutline-atomics --target=aarch64-none-none -### %s -o %t 2>&1  | FileCheck %s
  ! CHECK: "-target-feature" "+outline-atomics"
  
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/bbc-openmp-version-macro.f90 llvm-project/flang/test/Driver/bbc-openmp-version-macro.f90
---- llvm-project.upstream/flang/test/Driver/bbc-openmp-version-macro.f90	2023-07-12 10:51:00.480797287 -0400
-+++ llvm-project/flang/test/Driver/bbc-openmp-version-macro.f90	2024-11-19 12:49:05.190149235 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/bbc-openmp-version-macro.f90 llvm-project-trunk-atd/flang/test/Driver/bbc-openmp-version-macro.f90
+--- llvm-project-trunk/flang/test/Driver/bbc-openmp-version-macro.f90	2024-08-27 20:04:05.648040865 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/bbc-openmp-version-macro.f90	2024-10-14 18:10:02.550470383 -0500
 @@ -13,7 +13,7 @@
  ! RUN: bbc -fopenmp -fopenmp-version=51 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-51
  ! RUN: bbc -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-52
@@ -10402,9 +9305,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/bbc-openmp-version-ma
  ! OPENMP-VERSION-11: {{.*}} = arith.constant 199911 : i32
  ! OPENMP-VERSION-20: {{.*}} = arith.constant 200011 : i32
  ! OPENMP-VERSION-25: {{.*}} = arith.constant 200505 : i32
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/color-diagnostics-forwarding.f90 llvm-project/flang/test/Driver/color-diagnostics-forwarding.f90
---- llvm-project.upstream/flang/test/Driver/color-diagnostics-forwarding.f90	2024-11-19 12:55:00.588014573 -0500
-+++ llvm-project/flang/test/Driver/color-diagnostics-forwarding.f90	2024-11-19 12:49:05.190149235 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/color-diagnostics-forwarding.f90 llvm-project-trunk-atd/flang/test/Driver/color-diagnostics-forwarding.f90
+--- llvm-project-trunk/flang/test/Driver/color-diagnostics-forwarding.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/color-diagnostics-forwarding.f90	2024-10-18 14:35:15.707083892 -0500
 @@ -1,5 +1,5 @@
 -! Test that flang forwards -f{no-}color-diagnostics and
 -! -f{no-}diagnostics-color options to flang -fc1 as expected.
@@ -10413,9 +9316,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/color-diagnostics-for
  
  ! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 -fcolor-diagnostics \
  ! RUN:   | FileCheck %s --check-prefix=CHECK-CD
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/compiler-options.f90 llvm-project/flang/test/Driver/compiler-options.f90
---- llvm-project.upstream/flang/test/Driver/compiler-options.f90	2024-11-19 12:55:00.588014573 -0500
-+++ llvm-project/flang/test/Driver/compiler-options.f90	2024-02-19 15:30:03.889299656 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/compiler-options.f90 llvm-project-trunk-atd/flang/test/Driver/compiler-options.f90
+--- llvm-project-trunk/flang/test/Driver/compiler-options.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/compiler-options.f90	2024-08-27 20:12:45.257833450 -0500
 @@ -1,6 +1,6 @@
  ! RUN: %flang -S -emit-llvm -flang-deprecated-no-hlfir -o - %s | FileCheck %s
 -! Test communication of COMPILER_OPTIONS from flang to flang -fc1.
@@ -10425,9 +9328,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/compiler-options.f90
  program main
      use ISO_FORTRAN_ENV, only: compiler_options
      implicit none
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/convert.f90 llvm-project/flang/test/Driver/convert.f90
---- llvm-project.upstream/flang/test/Driver/convert.f90	2024-11-19 12:55:00.588014573 -0500
-+++ llvm-project/flang/test/Driver/convert.f90	2024-02-19 15:30:03.889299656 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/convert.f90 llvm-project-trunk-atd/flang/test/Driver/convert.f90
+--- llvm-project-trunk/flang/test/Driver/convert.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/convert.f90	2024-08-27 20:12:45.257833450 -0500
 @@ -12,7 +12,7 @@
  ! RUN: not %flang -fconvert=foobar %s  2>&1 | FileCheck %s --check-prefix=INVALID
  
@@ -10437,18 +9340,18 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/convert.f90 llvm-proj
  !-----------------------------------------
  ! RUN: %flang_fc1 -emit-mlir -fconvert=unknown %s -o - | FileCheck %s --check-prefix=VALID_FC1
  ! RUN: %flang_fc1 -emit-mlir -fconvert=native %s -o - | FileCheck %s --check-prefix=VALID_FC1
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/disable-ext-name-interop.f90 llvm-project/flang/test/Driver/disable-ext-name-interop.f90
---- llvm-project.upstream/flang/test/Driver/disable-ext-name-interop.f90	2024-11-19 12:55:00.588014573 -0500
-+++ llvm-project/flang/test/Driver/disable-ext-name-interop.f90	2024-02-19 15:30:03.889299656 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/disable-ext-name-interop.f90 llvm-project-trunk-atd/flang/test/Driver/disable-ext-name-interop.f90
+--- llvm-project-trunk/flang/test/Driver/disable-ext-name-interop.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/disable-ext-name-interop.f90	2024-08-27 20:12:45.257833450 -0500
 @@ -1,4 +1,4 @@
 -! Test that we can disable the ExternalNameConversion pass in flang.
 +! Test that we can disable the ExternalNameConversion pass in flang-new.
  
  ! RUN: %flang_fc1 -S %s -o - 2>&1 | FileCheck %s --check-prefix=EXTNAMES
  ! RUN: %flang_fc1 -S -mmlir -disable-external-name-interop %s -o - 2>&1 | FileCheck %s --check-prefix=INTNAMES
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/driver-version.f90 llvm-project/flang/test/Driver/driver-version.f90
---- llvm-project.upstream/flang/test/Driver/driver-version.f90	2024-11-19 12:55:00.588014573 -0500
-+++ llvm-project/flang/test/Driver/driver-version.f90	2024-02-19 15:30:03.889299656 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/driver-version.f90 llvm-project-trunk-atd/flang/test/Driver/driver-version.f90
+--- llvm-project-trunk/flang/test/Driver/driver-version.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/driver-version.f90	2024-08-27 20:12:45.257833450 -0500
 @@ -4,12 +4,12 @@
  ! RUN: %flang_fc1 -version 2>&1 | FileCheck %s --check-prefix=VERSION-FC1
  ! RUN: not %flang_fc1 --version 2>&1 | FileCheck %s --check-prefix=ERROR-FC1
@@ -10464,9 +9367,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/driver-version.f90 ll
  
  ! VERSION-FC1: LLVM version
  
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/escaped-backslash.f90 llvm-project/flang/test/Driver/escaped-backslash.f90
---- llvm-project.upstream/flang/test/Driver/escaped-backslash.f90	2024-11-19 12:55:00.588014573 -0500
-+++ llvm-project/flang/test/Driver/escaped-backslash.f90	2024-02-19 15:30:03.889299656 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/escaped-backslash.f90 llvm-project-trunk-atd/flang/test/Driver/escaped-backslash.f90
+--- llvm-project-trunk/flang/test/Driver/escaped-backslash.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/escaped-backslash.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -1,14 +1,14 @@
  ! Ensure argument -fbackslash works as expected.
  
@@ -10484,9 +9387,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/escaped-backslash.f90
  !-----------------------------------------
  ! RUN: %flang_fc1 -E %s  2>&1 | FileCheck %s --check-prefix=ESCAPED
  ! RUN: %flang_fc1 -E -fbackslash -fno-backslash %s  2>&1 | FileCheck %s --check-prefix=ESCAPED
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/fdefault.f90 llvm-project/flang/test/Driver/fdefault.f90
---- llvm-project.upstream/flang/test/Driver/fdefault.f90	2024-11-19 12:55:00.588014573 -0500
-+++ llvm-project/flang/test/Driver/fdefault.f90	2024-02-19 15:30:03.889299656 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/fdefault.f90 llvm-project-trunk-atd/flang/test/Driver/fdefault.f90
+--- llvm-project-trunk/flang/test/Driver/fdefault.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/fdefault.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -2,25 +2,25 @@
  ! TODO: Add checks when actual codegen is possible for this family
  
@@ -10527,9 +9430,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/fdefault.f90 llvm-pro
  ! RUN: not %flang_fc1 -fsyntax-only -fdefault-double-8 %s  2>&1 | FileCheck %s --check-prefix=ERROR
  
  ! NOOPTION: integer(4),parameter::real_kind=4_4
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/flang-openmp-version-macro.f90 llvm-project/flang/test/Driver/flang-openmp-version-macro.f90
---- llvm-project.upstream/flang/test/Driver/flang-openmp-version-macro.f90	2023-07-12 10:51:00.480797287 -0400
-+++ llvm-project/flang/test/Driver/flang-openmp-version-macro.f90	2024-11-19 12:49:05.190149235 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/flang-openmp-version-macro.f90 llvm-project-trunk-atd/flang/test/Driver/flang-openmp-version-macro.f90
+--- llvm-project-trunk/flang/test/Driver/flang-openmp-version-macro.f90	2024-08-27 20:04:05.648040865 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/flang-openmp-version-macro.f90	2024-10-14 18:10:02.550470383 -0500
 @@ -13,7 +13,7 @@
  ! RUN: %flang_fc1 -fopenmp -fopenmp-version=51 -cpp -E %s | FileCheck %s --check-prefix=OPENMP-VERSION-51
  ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -cpp -E %s | FileCheck %s --check-prefix=OPENMP-VERSION-52
@@ -10539,9 +9442,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/flang-openmp-version-
  ! OPENMP-VERSION-11: integer :: var1 = 199911
  ! OPENMP-VERSION-20: integer :: var1 = 200011
  ! OPENMP-VERSION-25: integer :: var1 = 200505
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/flarge-sizes.f90 llvm-project/flang/test/Driver/flarge-sizes.f90
---- llvm-project.upstream/flang/test/Driver/flarge-sizes.f90	2024-11-19 12:55:00.588014573 -0500
-+++ llvm-project/flang/test/Driver/flarge-sizes.f90	2024-02-19 15:30:03.889299656 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/flarge-sizes.f90 llvm-project-trunk-atd/flang/test/Driver/flarge-sizes.f90
+--- llvm-project-trunk/flang/test/Driver/flarge-sizes.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/flarge-sizes.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -2,20 +2,20 @@
  ! TODO: Add checks when actual codegen is possible.
  
@@ -10573,9 +9476,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/flarge-sizes.f90 llvm
  
  ! NOLARGE: real(4)::z(1_8:10_8)
  ! NOLARGE-NEXT: integer(4),parameter::size_kind=4_4
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/fopenmp.f90 llvm-project/flang/test/Driver/fopenmp.f90
---- llvm-project.upstream/flang/test/Driver/fopenmp.f90	2024-11-19 12:55:00.588014573 -0500
-+++ llvm-project/flang/test/Driver/fopenmp.f90	2024-09-24 17:16:21.652863855 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/fopenmp.f90 llvm-project-trunk-atd/flang/test/Driver/fopenmp.f90
+--- llvm-project-trunk/flang/test/Driver/fopenmp.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/fopenmp.f90	2024-09-13 09:59:03.549878135 -0500
 @@ -73,7 +73,3 @@
  !
  ! CHECK-LD-ANYMD: "{{.*}}ld{{(.exe)?}}"
@@ -10584,18 +9487,18 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/fopenmp.f90 llvm-proj
 -! RUN: %flang -fopenmp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-EXPERIMENTAL
 -!
 -! CHECK-EXPERIMENTAL: flang{{.*}}: warning: OpenMP support in flang is still experimental
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/frame-pointer-forwarding.f90 llvm-project/flang/test/Driver/frame-pointer-forwarding.f90
---- llvm-project.upstream/flang/test/Driver/frame-pointer-forwarding.f90	2024-11-19 12:55:00.588014573 -0500
-+++ llvm-project/flang/test/Driver/frame-pointer-forwarding.f90	2024-02-19 15:30:03.889299656 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/frame-pointer-forwarding.f90 llvm-project-trunk-atd/flang/test/Driver/frame-pointer-forwarding.f90
+--- llvm-project-trunk/flang/test/Driver/frame-pointer-forwarding.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/frame-pointer-forwarding.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -1,4 +1,4 @@
 -! Test that flang forwards -fno-omit-frame-pointer and -fomit-frame-pointer Flang frontend
 +! Test that flang-new forwards -fno-omit-frame-pointer and -fomit-frame-pointer Flang frontend
  ! RUN: %flang --target=aarch64-none-none -fsyntax-only -### %s -o %t 2>&1  | FileCheck %s --check-prefix=CHECK-NOVALUE
  ! CHECK-NOVALUE: "-fc1"{{.*}}"-mframe-pointer=non-leaf"
  
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/frontend-forwarding.f90 llvm-project/flang/test/Driver/frontend-forwarding.f90
---- llvm-project.upstream/flang/test/Driver/frontend-forwarding.f90	2024-11-19 12:55:00.588014573 -0500
-+++ llvm-project/flang/test/Driver/frontend-forwarding.f90	2024-11-19 12:49:05.191149231 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/frontend-forwarding.f90 llvm-project-trunk-atd/flang/test/Driver/frontend-forwarding.f90
+--- llvm-project-trunk/flang/test/Driver/frontend-forwarding.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/frontend-forwarding.f90	2024-10-29 11:08:03.749483572 -0500
 @@ -1,5 +1,5 @@
 -! Test that flang forwards Flang frontend
 -! options to flang -fc1 as expected.
@@ -10604,9 +9507,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/frontend-forwarding.f
  
  ! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \
  ! RUN:     -finput-charset=utf-8 \
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/hlfir-no-hlfir-error.f90 llvm-project/flang/test/Driver/hlfir-no-hlfir-error.f90
---- llvm-project.upstream/flang/test/Driver/hlfir-no-hlfir-error.f90	2024-11-19 12:55:00.588014573 -0500
-+++ llvm-project/flang/test/Driver/hlfir-no-hlfir-error.f90	2024-02-19 15:30:03.889299656 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/hlfir-no-hlfir-error.f90 llvm-project-trunk-atd/flang/test/Driver/hlfir-no-hlfir-error.f90
+--- llvm-project-trunk/flang/test/Driver/hlfir-no-hlfir-error.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/hlfir-no-hlfir-error.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -2,12 +2,12 @@
  ! options cannot be both used.
  
@@ -10622,9 +9525,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/hlfir-no-hlfir-error.
  !-----------------------------------------
  ! RUN: not %flang_fc1 -emit-llvm -flang-experimental-hlfir -flang-deprecated-no-hlfir %s 2>&1 | FileCheck %s
  
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/intrinsic-module-path.f90 llvm-project/flang/test/Driver/intrinsic-module-path.f90
---- llvm-project.upstream/flang/test/Driver/intrinsic-module-path.f90	2024-11-19 12:55:00.589014570 -0500
-+++ llvm-project/flang/test/Driver/intrinsic-module-path.f90	2024-02-19 15:30:03.889299656 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/intrinsic-module-path.f90 llvm-project-trunk-atd/flang/test/Driver/intrinsic-module-path.f90
+--- llvm-project-trunk/flang/test/Driver/intrinsic-module-path.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/intrinsic-module-path.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -4,7 +4,7 @@
  ! default one, causing a CHECKSUM error.
  
@@ -10634,9 +9537,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/intrinsic-module-path
  !-----------------------------------------
  ! RUN: %flang_fc1 -fsyntax-only %s  2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT
  ! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s  2>&1 | FileCheck %s --check-prefix=GIVEN
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/large-data-threshold.f90 llvm-project/flang/test/Driver/large-data-threshold.f90
---- llvm-project.upstream/flang/test/Driver/large-data-threshold.f90	2024-11-19 12:55:00.589014570 -0500
-+++ llvm-project/flang/test/Driver/large-data-threshold.f90	2024-07-16 09:33:27.100719811 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/large-data-threshold.f90 llvm-project-trunk-atd/flang/test/Driver/large-data-threshold.f90
+--- llvm-project-trunk/flang/test/Driver/large-data-threshold.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/large-data-threshold.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -7,11 +7,11 @@
  ! RUN: not %flang -### -c --target=aarch64 -mcmodel=small -mlarge-data-threshold=32768 %s 2>&1 | FileCheck %s --check-prefix=NOT-SUPPORTED
    
@@ -10652,9 +9555,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/large-data-threshold.
  ! CHECK-1M-SAME: "-mlarge-data-threshold=1048576"
  ! NO-MCMODEL: 'mlarge-data-threshold=' only applies to medium and large code models
  ! INVALID: error: invalid value 'nonsense' in '-mlarge-data-threshold='
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/lto-flags.f90 llvm-project/flang/test/Driver/lto-flags.f90
---- llvm-project.upstream/flang/test/Driver/lto-flags.f90	2024-11-19 12:55:00.589014570 -0500
-+++ llvm-project/flang/test/Driver/lto-flags.f90	2024-02-19 15:30:03.893299643 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/lto-flags.f90 llvm-project-trunk-atd/flang/test/Driver/lto-flags.f90
+--- llvm-project-trunk/flang/test/Driver/lto-flags.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/lto-flags.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -30,7 +30,7 @@
  ! FULL-LTO: "-fc1"
  ! FULL-LTO-SAME: "-flto=full"
@@ -10664,9 +9567,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/lto-flags.f90 llvm-pr
  ! THIN-LTO-ALL: "-fc1"
  ! THIN-LTO-ALL-SAME: "-flto=thin"
  ! THIN-LTO-LINKER-PLUGIN: "-plugin-opt=thinlto"
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/macro-def-undef.F90 llvm-project/flang/test/Driver/macro-def-undef.F90
---- llvm-project.upstream/flang/test/Driver/macro-def-undef.F90	2024-11-19 12:55:00.589014570 -0500
-+++ llvm-project/flang/test/Driver/macro-def-undef.F90	2024-02-19 15:30:03.893299643 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/macro-def-undef.F90 llvm-project-trunk-atd/flang/test/Driver/macro-def-undef.F90
+--- llvm-project-trunk/flang/test/Driver/macro-def-undef.F90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/macro-def-undef.F90	2024-08-27 20:12:45.261833430 -0500
 @@ -1,14 +1,14 @@
  ! Ensure arguments -D and -U work as expected.
  
@@ -10684,9 +9587,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/macro-def-undef.F90 l
  !-----------------------------------------
  ! RUN: %flang_fc1 -E -P %s  2>&1 | FileCheck %s --check-prefix=UNDEFINED
  ! RUN: %flang_fc1 -E -P -DX=A %s  2>&1 | FileCheck %s --check-prefix=DEFINED
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/missing-input.f90 llvm-project/flang/test/Driver/missing-input.f90
---- llvm-project.upstream/flang/test/Driver/missing-input.f90	2024-11-19 12:55:00.589014570 -0500
-+++ llvm-project/flang/test/Driver/missing-input.f90	2024-02-19 15:30:03.893299643 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/missing-input.f90 llvm-project-trunk-atd/flang/test/Driver/missing-input.f90
+--- llvm-project-trunk/flang/test/Driver/missing-input.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/missing-input.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -1,26 +1,26 @@
  ! Test the behaviour of the driver when input is missing or is invalid. Note
 -! that with the compiler driver (flang), the input _has_ to be specified.
@@ -10721,9 +9624,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/missing-input.f90 llv
  
  ! FLANG-FC1-NONEXISTENT-FILE: error: {{.*}} does not exist
  ! FLANG-FC1-DIR: error: {{.*}} is not a regular file
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/multiple-input-files.f90 llvm-project/flang/test/Driver/multiple-input-files.f90
---- llvm-project.upstream/flang/test/Driver/multiple-input-files.f90	2024-11-19 12:55:00.589014570 -0500
-+++ llvm-project/flang/test/Driver/multiple-input-files.f90	2024-02-19 15:30:03.893299643 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/multiple-input-files.f90 llvm-project-trunk-atd/flang/test/Driver/multiple-input-files.f90
+--- llvm-project-trunk/flang/test/Driver/multiple-input-files.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/multiple-input-files.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -39,7 +39,7 @@
  ! FLANG-NEXT:end program hello
  
@@ -10733,9 +9636,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/multiple-input-files.
  
  ! TEST 3: The output file _was not_ specified - `flang_fc1` will process all
  ! input files and generate one output file for every input file.
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/omp-driver-offload.f90 llvm-project/flang/test/Driver/omp-driver-offload.f90
---- llvm-project.upstream/flang/test/Driver/omp-driver-offload.f90	2024-11-19 12:55:00.589014570 -0500
-+++ llvm-project/flang/test/Driver/omp-driver-offload.f90	2024-08-12 11:55:29.908279240 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/omp-driver-offload.f90 llvm-project-trunk-atd/flang/test/Driver/omp-driver-offload.f90
+--- llvm-project-trunk/flang/test/Driver/omp-driver-offload.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/omp-driver-offload.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -1,6 +1,6 @@
 -! Test that flang OpenMP and OpenMP offload related 
 +! Test that flang-new OpenMP and OpenMP offload related 
@@ -10932,9 +9835,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/omp-driver-offload.f9
 -! OFFLOAD-TARGETS: "{{[^"]*}}flang" "-fc1" "-triple" "x86_64-unknown-linux-gnu"
 +! OFFLOAD-TARGETS: "{{[^"]*}}flang-new" "-fc1" "-triple" "x86_64-unknown-linux-gnu"
  ! OFFLOAD-TARGETS-SAME: "-fopenmp-targets=amdgcn-amd-amdhsa"
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/predefined-macros-compiler-version.F90 llvm-project/flang/test/Driver/predefined-macros-compiler-version.F90
---- llvm-project.upstream/flang/test/Driver/predefined-macros-compiler-version.F90	2024-11-19 12:55:00.589014570 -0500
-+++ llvm-project/flang/test/Driver/predefined-macros-compiler-version.F90	2024-02-19 15:30:03.893299643 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/predefined-macros-compiler-version.F90 llvm-project-trunk-atd/flang/test/Driver/predefined-macros-compiler-version.F90
+--- llvm-project-trunk/flang/test/Driver/predefined-macros-compiler-version.F90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/predefined-macros-compiler-version.F90	2024-08-27 20:12:45.261833430 -0500
 @@ -1,12 +1,12 @@
  ! Check that the driver correctly defines macros with the compiler version
  
@@ -10950,9 +9853,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/predefined-macros-com
  !-----------------------------------------
  ! RUN: %flang_fc1 -E %s  2>&1 | FileCheck %s --ignore-case
  
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/std2018.f90 llvm-project/flang/test/Driver/std2018.f90
---- llvm-project.upstream/flang/test/Driver/std2018.f90	2024-11-19 12:55:00.589014570 -0500
-+++ llvm-project/flang/test/Driver/std2018.f90	2024-02-19 15:30:03.893299643 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/std2018.f90 llvm-project-trunk-atd/flang/test/Driver/std2018.f90
+--- llvm-project-trunk/flang/test/Driver/std2018.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/std2018.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -1,7 +1,7 @@
  ! Ensure argument -std=f2018 works as expected.
  
@@ -10962,9 +9865,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/std2018.f90 llvm-proj
  !-----------------------------------------
  ! RUN: %flang_fc1 -fsyntax-only %s  2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT
  ! RUN: %flang_fc1 -fsyntax-only -std=f2018 %s  2>&1 | FileCheck %s --check-prefix=GIVEN
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/std2018-wrong.f90 llvm-project/flang/test/Driver/std2018-wrong.f90
---- llvm-project.upstream/flang/test/Driver/std2018-wrong.f90	2024-11-19 12:55:00.589014570 -0500
-+++ llvm-project/flang/test/Driver/std2018-wrong.f90	2024-02-19 15:30:03.893299643 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/std2018-wrong.f90 llvm-project-trunk-atd/flang/test/Driver/std2018-wrong.f90
+--- llvm-project-trunk/flang/test/Driver/std2018-wrong.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/std2018-wrong.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -1,7 +1,7 @@
  ! Ensure argument -std=f2018 works as expected.
  
@@ -10974,9 +9877,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/std2018-wrong.f90 llv
  !-----------------------------------------
  ! RUN: not %flang_fc1 -std=90 %s  2>&1 | FileCheck %s --check-prefix=WRONG
  
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/supported-suffices/f03-suffix.f03 llvm-project/flang/test/Driver/supported-suffices/f03-suffix.f03
---- llvm-project.upstream/flang/test/Driver/supported-suffices/f03-suffix.f03	2024-11-19 12:55:00.589014570 -0500
-+++ llvm-project/flang/test/Driver/supported-suffices/f03-suffix.f03	2024-02-19 15:30:03.893299643 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/supported-suffices/f03-suffix.f03 llvm-project-trunk-atd/flang/test/Driver/supported-suffices/f03-suffix.f03
+--- llvm-project-trunk/flang/test/Driver/supported-suffices/f03-suffix.f03	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/supported-suffices/f03-suffix.f03	2024-08-27 20:12:45.261833430 -0500
 @@ -1,5 +1,5 @@
  ! RUN: %flang -### %s 2>&1 | FileCheck %s
  
@@ -10984,9 +9887,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/supported-suffices/f0
 +! CHECK: "{{.*}}flang-new" "-fc1" {{.*}} "-o" "{{.*}}.o"
  program f03
  end program f03
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/supported-suffices/f08-suffix.f08 llvm-project/flang/test/Driver/supported-suffices/f08-suffix.f08
---- llvm-project.upstream/flang/test/Driver/supported-suffices/f08-suffix.f08	2024-11-19 12:55:00.589014570 -0500
-+++ llvm-project/flang/test/Driver/supported-suffices/f08-suffix.f08	2024-02-19 15:30:03.893299643 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/supported-suffices/f08-suffix.f08 llvm-project-trunk-atd/flang/test/Driver/supported-suffices/f08-suffix.f08
+--- llvm-project-trunk/flang/test/Driver/supported-suffices/f08-suffix.f08	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/supported-suffices/f08-suffix.f08	2024-08-27 20:12:45.261833430 -0500
 @@ -1,5 +1,5 @@
  ! RUN: %flang -### %s 2>&1 | FileCheck %s
  
@@ -10994,9 +9897,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/supported-suffices/f0
 +! CHECK: "{{.*}}flang-new" "-fc1" {{.*}} "-o" "{{.*}}.o"
  program f08
  end program f08
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/use-module-error.f90 llvm-project/flang/test/Driver/use-module-error.f90
---- llvm-project.upstream/flang/test/Driver/use-module-error.f90	2024-11-19 12:55:00.590014566 -0500
-+++ llvm-project/flang/test/Driver/use-module-error.f90	2024-11-19 12:49:05.191149231 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/use-module-error.f90 llvm-project-trunk-atd/flang/test/Driver/use-module-error.f90
+--- llvm-project-trunk/flang/test/Driver/use-module-error.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/use-module-error.f90	2024-10-18 14:35:15.707083892 -0500
 @@ -1,7 +1,7 @@
  ! Ensure that multiple module directories are not allowed
  
@@ -11015,9 +9918,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/use-module-error.f90
  !-----------------------------------------
  ! RUN: %flang_fc1 -fsyntax-only -J %S/Inputs/ %s 2>&1 | FileCheck %s --allow-empty --check-prefix=SINGLEINCLUDE
  ! RUN: %flang_fc1 -fsyntax-only -J %S/Inputs/ -J %S/Inputs/ %s 2>&1 | FileCheck %s --allow-empty --check-prefix=SINGLEINCLUDE
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/use-module.f90 llvm-project/flang/test/Driver/use-module.f90
---- llvm-project.upstream/flang/test/Driver/use-module.f90	2024-11-19 12:55:00.590014566 -0500
-+++ llvm-project/flang/test/Driver/use-module.f90	2024-02-19 15:30:03.893299643 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/use-module.f90 llvm-project-trunk-atd/flang/test/Driver/use-module.f90
+--- llvm-project-trunk/flang/test/Driver/use-module.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/use-module.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -1,7 +1,7 @@
  ! Checks that module search directories specified with `-J/-module-dir` and `-I` are handled correctly
  
@@ -11036,9 +9939,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/use-module.f90 llvm-p
  !-----------------------------------------
  ! RUN: %flang_fc1 -fsyntax-only -I %S/Inputs -I %S/Inputs/module-dir %s  2>&1 | FileCheck %s --check-prefix=INCLUDED --allow-empty
  ! RUN: %flang_fc1 -fsyntax-only -I %S/Inputs -J %S/Inputs/module-dir %s 2>&1 | FileCheck %s --check-prefix=INCLUDED --allow-empty
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/version-loops.f90 llvm-project/flang/test/Driver/version-loops.f90
---- llvm-project.upstream/flang/test/Driver/version-loops.f90	2024-11-19 12:55:00.590014566 -0500
-+++ llvm-project/flang/test/Driver/version-loops.f90	2024-02-19 15:30:03.893299643 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/version-loops.f90 llvm-project-trunk-atd/flang/test/Driver/version-loops.f90
+--- llvm-project-trunk/flang/test/Driver/version-loops.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/version-loops.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -1,5 +1,5 @@
 -! Test that flang forwards the -f{no-,}version-loops-for-stride 
 -! options correctly to flang -fc1 for different variants of optimisation
@@ -11087,29 +9990,18 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Driver/version-loops.f90 llv
 +! CHECK-O3-no: "{{.*}}flang-new" "-fc1"
  ! CHECK-O3-no-NOT: "-fversion-loops-for-stride"
  ! CHECK-O3-no-SAME: "-O3"
-diff -Naur -x .git llvm-project.upstream/flang/test/Driver/wextra-ok.f90 llvm-project/flang/test/Driver/wextra-ok.f90
---- llvm-project.upstream/flang/test/Driver/wextra-ok.f90	2024-11-19 12:55:00.590014566 -0500
-+++ llvm-project/flang/test/Driver/wextra-ok.f90	2024-06-17 09:19:11.273661655 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/Driver/wextra-ok.f90 llvm-project-trunk-atd/flang/test/Driver/wextra-ok.f90
+--- llvm-project-trunk/flang/test/Driver/wextra-ok.f90	2024-10-18 17:40:44.696867292 -0500
++++ llvm-project-trunk-atd/flang/test/Driver/wextra-ok.f90	2024-08-27 20:12:45.261833430 -0500
 @@ -1,4 +1,4 @@
 -! Ensure that supplying -Wextra into flang does not raise error
 +! Ensure that supplying -Wextra into flang-new does not raise error
  ! The first check should be changed if -Wextra is implemented
  
  ! RUN: %flang -std=f2018 -Wextra %s -c 2>&1 | FileCheck %s --check-prefix=CHECK-OK
-diff -Naur -x .git llvm-project.upstream/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir llvm-project/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
---- llvm-project.upstream/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir	2024-11-19 12:55:58.494825727 -0500
-+++ llvm-project/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir	2024-11-19 12:49:05.193149225 -0500
-@@ -985,8 +985,8 @@
-     %6 = fir.coordinate_of %arg0, %5 : (!fir.ref<!fir.type<_QFTtop_layer{array_i:!fir.array<10xi32>,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>, !fir.field) -> !fir.ref<i32>
-     // CHECK: %[[MAP_MEMBER_2:.*]] = omp.map.info var_ptr(%[[GEP_3]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
-     %7 = omp.map.info var_ptr(%6 : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32>
--    // CHECK: %[[PARENT_MAP:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<"_QFTtop_layer", (array<10 x i32>, struct<"_QFTbottom_layer", (array<10 x f32>, f64)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_1]], %[[MAP_MEMBER_2]] : [1,1], [2,-1] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true}
--    %9 = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.type<_QFTtop_layer{array_i:!fir.array<10xi32>,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>, !fir.type<_QFTtop_layer{array_i:!fir.array<10xi32>,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>) map_clauses(tofrom) capture(ByRef) members(%4, %7 : [1,1], [2,-1] : !fir.ref<f64>, !fir.ref<i32>) -> !fir.ref<!fir.type<_QFTtop_layer{array_i:!fir.array<10xi32>,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>> {partial_map = true}
-+    // CHECK: %[[PARENT_MAP:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<"_QFTtop_layer", (array<10 x i32>, struct<"_QFTbottom_layer", (array<10 x f32>, f64)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_1]], %[[MAP_MEMBER_2]] : [1,1], [2] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true}
-+    %9 = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.type<_QFTtop_layer{array_i:!fir.array<10xi32>,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>, !fir.type<_QFTtop_layer{array_i:!fir.array<10xi32>,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>) map_clauses(tofrom) capture(ByRef) members(%4, %7 : [1,1], [2] : !fir.ref<f64>, !fir.ref<i32>) -> !fir.ref<!fir.type<_QFTtop_layer{array_i:!fir.array<10xi32>,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>> {partial_map = true}
-     // CHECK: omp.target map_entries(%[[MAP_MEMBER_1]] -> %{{.*}}, %[[MAP_MEMBER_2]] -> %{{.*}}, %[[PARENT_MAP]] -> %{{.*}} : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-     omp.target map_entries(%4 -> %arg1, %7 -> %arg2, %9 -> %arg3 : !fir.ref<f64>, !fir.ref<i32>, !fir.ref<!fir.type<_QFTtop_layer{array_i:!fir.array<10xi32>,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>) {
-       omp.terminator
+diff -Naur -x .git llvm-project-trunk/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir llvm-project-trunk-atd/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+--- llvm-project-trunk/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir	2024-11-23 20:25:46.231205467 -0600
++++ llvm-project-trunk-atd/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir	2024-11-23 20:26:13.831106231 -0600
 @@ -1022,7 +1022,7 @@
      %8 = fir.load %4 : !fir.ref<i32>
      %9 = arith.addi %8, %c20_i32 : i32
@@ -11119,7 +10011,7 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Fir/convert-to-llvm-openmp-a
    }
    return
  }
-@@ -1059,9 +1059,171 @@
+@@ -1059,7 +1059,7 @@
      %9 = fir.load %arg0 : !fir.ref<i32>
      %10 = arith.muli %9, %c10_i32 : i32
      fir.store %10 to %arg1 : !fir.ref<i32>
@@ -11128,671 +10020,18 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Fir/convert-to-llvm-openmp-a
    }
    return
  }
- 
- fir.global common @var_common_(dense<0> : vector<8xi8>) {alignment = 4 : i64} : !fir.array<8xi8>
-+
-+// -----
-+
-+// NOTE: This test (and the other allocatable member map tests below) uses mock
-+// bounds to simplify the example, the real bounds generation is more complex
-+// as it has to access the box information. However, it's not what the test
-+// aims to check. The test aims to check the parent member bindings and
-+// acceses are appropriately maintained when lowering to the LLVM dialect.
-+
-+// CHECK-LABEL:  llvm.func @omp_map_derived_type_allocatable_member
-+// CHECK-SAME:   %[[ARG_0:.*]]: !llvm.ptr)
-+func.func @omp_map_derived_type_allocatable_member(%arg0 : !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) {
-+  %c4 = arith.constant 4 : index
-+  %c1 = arith.constant 1 : index
-+  %c0 = arith.constant 0 : index
-+  // CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true}
-+  %0 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c4 : index) extent(%c4 : index) stride(%c1 : index) start_idx(%c0 : index) {stride_in_bytes = true}
-+  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ARG_0]][0, 4] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
-+  %1 = fir.coordinate_of %arg0, %c4 : (!fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+  // CHECK: %[[BADDR_GEP:.*]] = llvm.getelementptr %[[GEP]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-+  %2 = fir.box_offset %1 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
-+  // CHECK: %[[MAP_MEMBER_BADDR:.*]] = omp.map.info var_ptr(%[[GEP]] : !llvm.ptr, i32) var_ptr_ptr(%[[BADDR_GEP]] : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr {name = ""}
-+  %3 = omp.map.info var_ptr(%1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%2 : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%0) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
-+  // CHECK: %[[MAP_MEMBER_DESCRIPTOR:.*]] = omp.map.info var_ptr(%[[GEP]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%array_j"}
-+  %4 = omp.map.info var_ptr(%1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%array_j"}
-+  // CHECK: %[[MAP_PARENT_DTYPE:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<"_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_DESCRIPTOR]], %[[MAP_MEMBER_BADDR]] : [4], [4,0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l", partial_map = true}
-+  %5 = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, !fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>) map_clauses(tofrom) capture(ByRef) members(%4, %3 : [4], [4,0] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>> {name = "one_l", partial_map = true}
-+  // CHECK: omp.target map_entries(%[[MAP_MEMBER_DESCRIPTOR]] -> %[[ARG_1:.*]], %[[MAP_MEMBER_BADDR]] -> %[[ARG_2:.*]], %[[MAP_PARENT_DTYPE]] -> %[[ARG_3:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-+  omp.target map_entries(%4 -> %arg1, %3 -> %arg2, %5 -> %arg3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) {
-+    omp.terminator
-+  }
-+  return
-+}
-+
-+// -----
-+
-+// CHECK-LABEL:  llvm.func @omp_allocatable_derived_type_member_map
-+// CHECK-SAME:   %[[ARG_0:.*]]: !llvm.ptr)
-+func.func @omp_allocatable_derived_type_member_map(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>) {
-+  // CHECK: %[[DTYPE_ALLOCATABLE_ALOCA_2:.*]] = llvm.alloca {{.*}} x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
-+  // CHECK: %[[DTYPE_ALLOCATABLE_ALOCA:.*]] = llvm.alloca {{.*}} x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
-+  %c5 = arith.constant 5 : index
-+  %c4 = arith.constant 4 : index
-+  %c1 = arith.constant 1 : index
-+  %c0 = arith.constant 0 : index
-+  // CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true}
-+  %0 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c4 : index) extent(%c4 : index) stride(%c1 : index) start_idx(%c0 : index) {stride_in_bytes = true}
-+  // CHECK: "llvm.intr.memcpy"(%[[DTYPE_ALLOCATABLE_ALOCA]], %[[ARG_0]], %{{.*}})
-+  %1 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>
-+  // CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[DTYPE_ALLOCATABLE_ALOCA]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
-+  // CHECK: %[[LOAD_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr
-+  // CHECK: %[[GEP_DTYPE_MEMBER:.*]] = llvm.getelementptr %[[LOAD_DTYPE_BADDR]][0, 4] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
-+  %2 = fir.coordinate_of %1, %c4 : (!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+  // CHECK: %[[DTYPE_MEMBER_BADDR:.*]] = llvm.getelementptr %[[GEP_DTYPE_MEMBER]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-+  %3 = fir.box_offset %2 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
-+  // CHECK: %[[MAP_MEMBER_BADDR:.*]] = omp.map.info var_ptr(%[[GEP_DTYPE_MEMBER]] : !llvm.ptr, i32) var_ptr_ptr(%[[DTYPE_MEMBER_BADDR]] : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr {name = ""}
-+  %4 = omp.map.info var_ptr(%2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%3 : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%0) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
-+  // CHECK: %[[MAP_MEMBER_DESC:.*]] = omp.map.info var_ptr(%[[GEP_DTYPE_MEMBER]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%array_j"}
-+  %5 = omp.map.info var_ptr(%2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%array_j"}
-+  // CHECK: "llvm.intr.memcpy"(%[[DTYPE_ALLOCATABLE_ALOCA_2]], %[[ARG_0]], %{{.*}})
-+  %6 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>
-+  // CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[DTYPE_ALLOCATABLE_ALOCA_2]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
-+  // CHECK: %[[LOAD_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr
-+  // CHECK: %[[GEP_DTYPE_REGULAR_MEMBER:.*]] = llvm.getelementptr %[[LOAD_DTYPE_BADDR]][0, 5] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
-+  %7 = fir.coordinate_of %6, %c5 : (!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>, index) -> !fir.ref<i32>
-+  // CHECK: %[[MAP_REGULAR_MEMBER:.*]] = omp.map.info var_ptr(%[[GEP_DTYPE_REGULAR_MEMBER]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%k"}
-+  %8 = omp.map.info var_ptr(%7 : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "one_l%k"}
-+  // CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[ARG_0]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
-+  %9 = fir.box_offset %arg0 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>
-+  // CHECK: %[[MAP_DTYPE_PARENT_BADDR:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<"_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>) var_ptr_ptr(%[[GEP_DTYPE_BADDR]] : !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-+  %10 = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>, !fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>) var_ptr_ptr(%9 : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>> {name = ""}
-+  // CHECK: %[[MAP_DTYPE_PARENT_DESC:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_DTYPE_PARENT_BADDR]], %[[MAP_MEMBER_DESC]], %[[MAP_MEMBER_BADDR]], %[[MAP_REGULAR_MEMBER]] : [0], [0,4], [0,4,0], [0,5] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l"}
-+  %11 = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>, !fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>) map_clauses(tofrom) capture(ByRef) members(%10, %5, %4, %8 : [0], [0,4], [0,4,0], [0,5] : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<i32>) -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>> {name = "one_l"}
-+  // CHECK: omp.target map_entries(%[[MAP_DTYPE_PARENT_BADDR]] -> %[[ARG_1:.*]], %[[MAP_MEMBER_DESC]] -> %[[ARG_2:.*]], %[[MAP_MEMBER_BADDR]] -> %[[ARG_3:.*]], %[[MAP_REGULAR_MEMBER]] -> %[[ARG_4:.*]], %[[MAP_DTYPE_PARENT_DESC]] -> %[[ARG_5:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-+  omp.target map_entries(%10 -> %arg1, %5 -> %arg2, %4 -> %arg3, %8 -> %arg4, %11 -> %arg5 : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<i32>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>) {
-+   omp.terminator
-+  }
-+  return
-+}
-+
-+// -----
-+
-+// CHECK-LABEL:  llvm.func @omp_alloca_nested_derived_type_map
-+// CHECK-SAME:   %[[ARG_0:.*]]: !llvm.ptr)
-+func.func @omp_alloca_nested_derived_type_map(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>) {
-+  // CHECK: %[[DTYPE_ALLOCATABLE_ALOCA_2:.*]] = llvm.alloca {{.*}} x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
-+  // CHECK: %[[DTYPE_ALLOCATABLE_ALOCA:.*]] = llvm.alloca {{.*}} x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
-+  %c3 = arith.constant 3 : index
-+  %c4 = arith.constant 4 : index
-+  %c6 = arith.constant 6 : index
-+  %c1 = arith.constant 1 : index
-+  %c2 = arith.constant 2 : index
-+  %c0 = arith.constant 0 : index
-+  // CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true}
-+  %0 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c4 : index) extent(%c4 : index) stride(%c1 : index) start_idx(%c0 : index) {stride_in_bytes = true}
-+  // CHECK: "llvm.intr.memcpy"(%[[DTYPE_ALLOCATABLE_ALOCA]], %[[ARG_0]], %{{.*}})
-+  %1 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>
-+  // CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[DTYPE_ALLOCATABLE_ALOCA]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
-+  // CHECK: %[[LOAD_GEP_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr
-+  // CHECK: %[[LOAD_NESTED_DTYPE:.*]] = llvm.getelementptr %[[LOAD_GEP_DTYPE_BADDR]][0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>
-+  %2 = fir.coordinate_of %1, %c6 : (!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>, index) -> !fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+  // CHECK: %[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER:.*]] = llvm.getelementptr %[[LOAD_NESTED_DTYPE]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
-+  %3 = fir.coordinate_of %2, %c2 : (!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+  // CHECK: %[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER_BADDR:.*]] = llvm.getelementptr %[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-+  %4 = fir.box_offset %3 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
-+  // CHECK: %[[MAP_NESTED_MEMBER_BADDR:.*]] = omp.map.info var_ptr(%[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER]] : !llvm.ptr, i32) var_ptr_ptr(%[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER_BADDR]] : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr {name = ""}
-+  %5 = omp.map.info var_ptr(%3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%4 : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%0) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
-+  // CHECK: %[[MAP_NESTED_MEMBER_DESC:.*]] = omp.map.info var_ptr(%[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%array_k"}
-+  %6 = omp.map.info var_ptr(%3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%nest%array_k"}
-+  // CHECK: "llvm.intr.memcpy"(%[[DTYPE_ALLOCATABLE_ALOCA_2]], %[[ARG_0]], %{{.*}})
-+  // CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[DTYPE_ALLOCATABLE_ALOCA_2]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
-+  // CHECK: %[[LOAD_GEP_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr
-+  %7 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>
-+  // CHECK: %[[LOAD_NESTED_DTYPE:.*]] = llvm.getelementptr %[[LOAD_GEP_DTYPE_BADDR]][0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>
-+  %8 = fir.coordinate_of %7, %c6 : (!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>, index) -> !fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+  // CHECK: %[[NESTED_DTYPE_REGULAR_MEMBER_GEP:.*]] = llvm.getelementptr %[[LOAD_NESTED_DTYPE]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
-+  %9 = fir.coordinate_of %8, %c3 : (!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<i32>
-+  // CHECK: %[[MAP_REGULAR_NESTED_MEMBER:.*]] = omp.map.info var_ptr(%[[NESTED_DTYPE_REGULAR_MEMBER_GEP]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%k"}
-+  %10 = omp.map.info var_ptr(%9 : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "one_l%nest%k"}
-+  // CHECK: %[[DTYPE_BADDR_GEP:.*]] = llvm.getelementptr %[[ARG_0]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
-+  %11 = fir.box_offset %arg0 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>
-+  // CHECK: %[[MAP_PARENT_BADDR:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>) var_ptr_ptr(%[[DTYPE_BADDR_GEP]] : !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-+  %12 = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>, !fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>) var_ptr_ptr(%11 : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>> {name = ""}
-+  // CHECK: %[[MAP_PARENT_DESC:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_PARENT_BADDR]], %[[MAP_NESTED_MEMBER_DESC]], %[[MAP_NESTED_MEMBER_BADDR]], %[[MAP_REGULAR_NESTED_MEMBER]] : [0], [0,6,2], [0,6,2,0], [0,6,3] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l"}
-+  %13 = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>, !fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>) map_clauses(tofrom) capture(ByRef) members(%12, %6, %5, %10 : [0], [0,6,2], [0,6,2,0], [0,6,3] : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<i32>) -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>> {name = "one_l"}
-+  // CHECK: omp.target map_entries(%[[MAP_PARENT_BADDR]] -> %[[ARG_1:.*]], %[[MAP_NESTED_MEMBER_DESC]] -> %[[ARG_2:.*]], %[[MAP_NESTED_MEMBER_BADDR]] -> %[[ARG_3:.*]], %[[MAP_REGULAR_NESTED_MEMBER]] -> %[[ARG_4:.*]], %[[MAP_PARENT_DESC]] -> %[[ARG_5:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-+  omp.target map_entries(%12 -> %arg1, %6 -> %arg2, %5 -> %arg3, %10 -> %arg4, %13 -> %arg5 : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<i32>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>) {
-+    omp.terminator
-+  }
-+  return
-+}
-+
-+// -----
-+
-+// CHECK-LABEL:  llvm.func @omp_nested_derived_type_alloca_map
-+// CHECK-SAME:   %[[ARG_0:.*]]: !llvm.ptr)
-+func.func @omp_nested_derived_type_alloca_map(%arg0 : !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>) {
-+  %c4 = arith.constant 4 : index
-+  %c1 = arith.constant 1 : index
-+  %c2 = arith.constant 2 : index
-+  %c0 = arith.constant 0 : index
-+  %c6 = arith.constant 6 : index
-+  // CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true}
-+  %0 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c4 : index) extent(%c4 : index) stride(%c1 : index) start_idx(%c0 : index) {stride_in_bytes = true}
-+  // CHECK: %[[NESTED_DTYPE_MEMBER_GEP:.*]] = llvm.getelementptr %[[ARG_0]][0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>
-+  %1 = fir.coordinate_of %arg0, %c6 : (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>, index) -> !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+  // CHECK: %[[NESTED_ALLOCATABLE_MEMBER_GEP:.*]] = llvm.getelementptr %[[NESTED_DTYPE_MEMBER_GEP]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
-+  %2 = fir.coordinate_of %1, %c2 : (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+  // CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP:.*]] = llvm.getelementptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-+  %3 = fir.box_offset %2 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
-+  // CHECK: %[[MAP_NESTED_ALLOCATABLE_MEMBER_BADDR:.*]] = omp.map.info var_ptr(%[[NESTED_ALLOCATABLE_MEMBER_GEP]] : !llvm.ptr, i32) var_ptr_ptr(%[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]] : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr {name = ""}
-+  %4 = omp.map.info var_ptr(%2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%3 : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%0) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
-+  // CHECK: %[[MAP_NESTED_ALLOCATABLE_MEMBER_DESC:.*]] = omp.map.info var_ptr(%[[NESTED_ALLOCATABLE_MEMBER_GEP]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%array_k"}
-+  %5 = omp.map.info var_ptr(%2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%nest%array_k"}
-+  // CHECK: %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_NESTED_ALLOCATABLE_MEMBER_DESC]], %[[MAP_NESTED_ALLOCATABLE_MEMBER_BADDR]] : [6,2], [6,2,0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l", partial_map = true}
-+  %6 = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>, !fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>) map_clauses(tofrom) capture(ByRef) members(%5, %4 : [6,2], [6,2,0] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>> {name = "one_l", partial_map = true}
-+  // CHECK: omp.target map_entries(%[[MAP_NESTED_ALLOCATABLE_MEMBER_DESC]] -> %[[ARG_1:.*]], %[[MAP_NESTED_ALLOCATABLE_MEMBER_BADDR]] -> %[[ARG_2:.*]], %[[MAP_PARENT]] -> %[[ARG_3:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-+  omp.target map_entries(%5 -> %arg1, %4 -> %arg2, %6 -> %arg3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>) {
-+    omp.terminator
-+  }
-+  return
-+}
-diff -Naur -x .git llvm-project.upstream/flang/test/HLFIR/hlfir-flags.f90 llvm-project/flang/test/HLFIR/hlfir-flags.f90
---- llvm-project.upstream/flang/test/HLFIR/hlfir-flags.f90	2024-11-19 12:55:00.596014547 -0500
-+++ llvm-project/flang/test/HLFIR/hlfir-flags.f90	2024-02-19 15:30:03.909299593 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/HLFIR/hlfir-flags.f90 llvm-project-trunk-atd/flang/test/HLFIR/hlfir-flags.f90
+--- llvm-project-trunk/flang/test/HLFIR/hlfir-flags.f90	2024-10-18 17:40:44.700867251 -0500
++++ llvm-project-trunk-atd/flang/test/HLFIR/hlfir-flags.f90	2024-08-27 20:12:45.277833351 -0500
 @@ -1,4 +1,4 @@
 -! Test -flang-deprecated-hlfir, -flang-experimental-hlfir (flang), and
 +! Test -flang-deprecated-hlfir, -flang-experimental-hlfir (flang-new), and
  ! -hlfir (bbc), -emit-hlfir, -emit-fir flags
  ! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck --check-prefix HLFIR --check-prefix ALL %s
  ! RUN: bbc -emit-hlfir -o - %s | FileCheck --check-prefix HLFIR --check-prefix ALL %s
-diff -Naur -x .git llvm-project.upstream/flang/test/Integration/OpenMP/map-types-and-sizes.f90 llvm-project/flang/test/Integration/OpenMP/map-types-and-sizes.f90
---- llvm-project.upstream/flang/test/Integration/OpenMP/map-types-and-sizes.f90	2024-11-19 12:55:58.494825727 -0500
-+++ llvm-project/flang/test/Integration/OpenMP/map-types-and-sizes.f90	2024-11-19 12:49:05.197149213 -0500
-@@ -30,8 +30,8 @@
-   !$omp end target
- end subroutine mapType_array
- 
--!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [3 x i64] [i64 0, i64 24, i64 4]
--!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [3 x i64] [i64 32, i64 281474976710657, i64 281474976711187]
-+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 24, i64 8, i64 4]
-+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976711169, i64 281474976711171, i64 281474976711187]
- subroutine mapType_ptr
-   integer, pointer :: a
-   !$omp target
-@@ -39,8 +39,8 @@
-   !$omp end target
- end subroutine mapType_ptr
- 
--!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [3 x i64] [i64 0, i64 24, i64 4]
--!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [3 x i64] [i64 32, i64 281474976710657, i64 281474976711187]
-+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 24, i64 8, i64 4]
-+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976711169, i64 281474976711171, i64 281474976711187]
- subroutine mapType_allocatable
-   integer, allocatable :: a
-   allocate(a)
-@@ -50,8 +50,8 @@
-   deallocate(a)
- end subroutine mapType_allocatable
- 
--!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [3 x i64] [i64 0, i64 24, i64 4]
--!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [3 x i64] [i64 32, i64 281474976710657, i64 281474976710675]
-+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 24, i64 8, i64 4]
-+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 281474976710675]
- subroutine mapType_ptr_explicit
-   integer, pointer :: a
-   !$omp target map(tofrom: a)
-@@ -59,8 +59,8 @@
-   !$omp end target
- end subroutine mapType_ptr_explicit
- 
--!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [3 x i64] [i64 0, i64 24, i64 4]
--!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [3 x i64] [i64 32, i64 281474976710657, i64 281474976710675]
-+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 24, i64 8, i64 4]
-+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 281474976710675]
- subroutine mapType_allocatable_explicit
-   integer, allocatable :: a
-   allocate(a)
-@@ -69,7 +69,7 @@
-   !$omp end target
-   deallocate(a)
- end subroutine mapType_allocatable_explicit
-- 
-+
- !CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [1 x i64] [i64 48]
- !CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [1 x i64] [i64 547]
- subroutine mapType_derived_implicit
-@@ -78,8 +78,8 @@
-     integer(4) :: array(10)
-     integer(4) :: int
-   end type scalar_and_array
--  type(scalar_and_array) :: scalar_arr 
--  
-+  type(scalar_and_array) :: scalar_arr
-+
-   !$omp target
-      scalar_arr%int = 1
-   !$omp end target
-@@ -93,8 +93,8 @@
-     integer(4) :: array(10)
-     integer(4) :: int
-   end type scalar_and_array
--  type(scalar_and_array) :: scalar_arr 
--  
-+  type(scalar_and_array) :: scalar_arr
-+
-   !$omp target map(tofrom: scalar_arr)
-      scalar_arr%int = 1
-   !$omp end target
-@@ -108,8 +108,8 @@
-     integer(4) :: array(10)
-     integer(4) :: int
-   end type scalar_and_array
--  type(scalar_and_array) :: scalar_arr 
--  
-+  type(scalar_and_array) :: scalar_arr
-+
-   !$omp target map(tofrom: scalar_arr%array)
-      scalar_arr%array(1) = 1
-   !$omp end target
-@@ -123,8 +123,8 @@
-     integer(4) :: array(10)
-     integer(4) :: int
-   end type scalar_and_array
--  type(scalar_and_array) :: scalar_arr 
--  
-+  type(scalar_and_array) :: scalar_arr
-+
-   !$omp target map(tofrom: scalar_arr%int, scalar_arr%real)
-      scalar_arr%int = 1
-   !$omp end target
-@@ -138,8 +138,8 @@
-     integer(4) :: array(10)
-     integer(4) :: int
-   end type scalar_and_array
--  type(scalar_and_array) :: scalar_arr 
--  
-+  type(scalar_and_array) :: scalar_arr
-+
-   !$omp target map(tofrom: scalar_arr%array(2:5))
-      scalar_arr%array(3) = 3
-   !$omp end target
-@@ -160,8 +160,8 @@
-     type(nested) :: nest
-     integer(4) :: int
-   end type scalar_and_array
--  type(scalar_and_array) :: scalar_arr 
--  
-+  type(scalar_and_array) :: scalar_arr
-+
-   !$omp target map(tofrom: scalar_arr%nest%real)
-     scalar_arr%nest%real = 1
-   !$omp end target
-@@ -182,8 +182,8 @@
-     type(nested) :: nest
-     integer(4) :: int
-   end type scalar_and_array
--  type(scalar_and_array) :: scalar_arr 
--  
-+  type(scalar_and_array) :: scalar_arr
-+
- !$omp target map(tofrom: scalar_arr%nest%int, scalar_arr%nest%real)
-   scalar_arr%nest%int = 1
- !$omp end target
-@@ -204,13 +204,117 @@
-     type(nested) :: nest
-     integer(4) :: int
-   end type scalar_and_array
--  type(scalar_and_array) :: scalar_arr 
--  
-+  type(scalar_and_array) :: scalar_arr
-+
- !$omp target map(tofrom: scalar_arr%nest%array(2:5))
-     scalar_arr%nest%array(3) = 3
- !$omp end target
- end subroutine mapType_derived_explicit_nested_member_with_bounds
- 
-+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 0]
-+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 281474976710675]
-+subroutine mapType_derived_type_alloca()
-+  type :: one_layer
-+  real(4) :: i
-+  integer, allocatable :: scalar
-+  integer(4) :: array_i(10)
-+  real(4) :: j
-+  integer, allocatable :: array_j(:)
-+  integer(4) :: k
-+  end type one_layer
-+
-+  type(one_layer) :: one_l
-+
-+  allocate(one_l%array_j(10))
-+
-+  !$omp target map(tofrom: one_l%array_j)
-+      one_l%array_j(1) = 10
-+  !$omp end target
-+end subroutine
-+
-+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [8 x i64] [i64 0, i64 40, i64 8, i64 136, i64 48, i64 8, i64 0, i64 4]
-+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [8 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 281474976710675, i64 281474976710657, i64 281474976710659, i64 281474976710675, i64 281474976710659]
-+subroutine mapType_alloca_derived_type()
-+  type :: one_layer
-+  real(4) :: i
-+  integer, allocatable :: scalar
-+  integer(4) :: array_i(10)
-+  real(4) :: j
-+  integer, allocatable :: array_j(:)
-+  integer(4) :: k
-+  end type one_layer
-+
-+  type(one_layer), allocatable :: one_l
-+
-+  allocate(one_l)
-+  allocate(one_l%array_j(10))
-+
-+  !$omp target map(tofrom: one_l%array_j, one_l%k)
-+      one_l%array_j(1) = 10
-+      one_l%k = 20
-+  !$omp end target
-+end subroutine
-+
-+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [8 x i64] [i64 0, i64 40, i64 8, i64 240, i64 48, i64 8, i64 0, i64 4]
-+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [8 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 281474976710675, i64 281474976710657, i64 281474976710659, i64 281474976710675, i64 281474976710659]
-+subroutine mapType_alloca_nested_derived_type()
-+  type :: middle_layer
-+  real(4) :: i
-+  integer(4) :: array_i(10)
-+  integer, allocatable :: array_k(:)
-+  integer(4) :: k
-+  end type middle_layer
-+
-+  type :: top_layer
-+  real(4) :: i
-+  integer, allocatable :: scalar
-+  integer(4) :: array_i(10)
-+  real(4) :: j
-+  integer, allocatable :: array_j(:)
-+  integer(4) :: k
-+  type(middle_layer) :: nest
-+  end type top_layer
-+
-+  type(top_layer), allocatable :: one_l
-+
-+  allocate(one_l)
-+  allocate(one_l%nest%array_k(10))
-+
-+  !$omp target map(tofrom: one_l%nest%array_k, one_l%nest%k)
-+      one_l%nest%array_k(1) = 10
-+      one_l%nest%k = 20
-+  !$omp end target
-+end subroutine
-+
-+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 0]
-+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 281474976710675]
-+subroutine mapType_nested_derived_type_alloca()
-+  type :: middle_layer
-+  real(4) :: i
-+  integer(4) :: array_i(10)
-+  integer, allocatable :: array_k(:)
-+  integer(4) :: k
-+  end type middle_layer
-+
-+  type :: top_layer
-+  real(4) :: i
-+  integer, allocatable :: scalar
-+  integer(4) :: array_i(10)
-+  real(4) :: j
-+  integer, allocatable :: array_j(:)
-+  integer(4) :: k
-+  type(middle_layer) :: nest
-+  end type top_layer
-+
-+  type(top_layer) :: one_l
-+
-+  allocate(one_l%nest%array_k(10))
-+
-+  !$omp target map(tofrom: one_l%nest%array_k)
-+      one_l%nest%array_k(1) = 25
-+  !$omp end target
-+end subroutine
-+
- !CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [2 x i64] [i64 8, i64 4]
- !CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [2 x i64] [i64 544, i64 800]
- subroutine mapType_c_ptr
-@@ -263,7 +367,7 @@
- !CHECK: %[[ALLOCA_INT:.*]] = ptrtoint ptr %[[ALLOCA]] to i64
- !CHECK: %[[SIZE_DIFF:.*]] = sub i64 %[[ALLOCA_GEP_INT]], %[[ALLOCA_INT]]
- !CHECK: %[[DIV:.*]] = sdiv exact i64 %[[SIZE_DIFF]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
--!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [3 x i64], ptr %.offload_sizes, i32 0, i32 0
-+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
- !CHECK: store i64 %[[DIV]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
- 
- !CHECK-LABEL: define {{.*}} @{{.*}}maptype_allocatable_explicit_{{.*}}
-@@ -273,7 +377,7 @@
- !CHECK: %[[ALLOCA_INT:.*]] = ptrtoint ptr %[[ALLOCA]] to i64
- !CHECK: %[[SIZE_DIFF:.*]] = sub i64 %[[ALLOCA_GEP_INT]], %[[ALLOCA_INT]]
- !CHECK: %[[DIV:.*]] = sdiv exact i64 %[[SIZE_DIFF]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
--!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [3 x i64], ptr %.offload_sizes, i32 0, i32 0
-+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
- !CHECK: store i64 %[[DIV]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
- 
- !CHECK-LABEL: define {{.*}} @{{.*}}maptype_derived_implicit_{{.*}}
-@@ -372,6 +476,217 @@
- !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0
- !CHECK: store ptr %[[ARR_OFF]], ptr %[[OFFLOAD_PTR_ARR]], align 8
- 
-+!CHECK-LABEL: define {{.*}} @{{.*}}maptype_derived_type_alloca_{{.*}}
-+!CHECK: %[[ALLOCATABLE_DESC_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
-+!CHECK: %[[ALLOCA:.*]] = alloca %_QFmaptype_derived_type_allocaTone_layer, i64 1, align 8
-+!CHECK: %[[DESC_BOUND_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCATABLE_DESC_ALLOCA]], i32 0, i32 7, i64 0, i32 1
-+!CHECK: %[[DESC_BOUND_ACCESS_LOAD:.*]] = load i64, ptr %[[DESC_BOUND_ACCESS]], align 8
-+!CHECK: %[[OFFSET_UB:.*]] = sub i64 %[[DESC_BOUND_ACCESS_LOAD]], 1
-+!CHECK: %[[MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_derived_type_allocaTone_layer, ptr %[[ALLOCA]], i32 0, i32 4
-+!CHECK: %[[MEMBER_DESCRIPTOR_BASE_ADDR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[MEMBER_ACCESS]], i32 0, i32 0
-+!CHECK: %[[CALCULATE_DIM_SIZE:.*]] = sub i64 %[[OFFSET_UB]], 0
-+!CHECK: %[[RESTORE_OFFSET:.*]] = add i64 %[[CALCULATE_DIM_SIZE]], 1
-+!CHECK: %[[MEMBER_BASE_ADDR_SIZE:.*]] = mul i64 1, %[[RESTORE_OFFSET]]
-+!CHECK: %[[DESC_BASE_ADDR_DATA_SIZE:.*]] = mul i64 %[[MEMBER_BASE_ADDR_SIZE]], 4
-+!CHECK: %[[MEMBER_ACCESS_ADDR_END:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[MEMBER_ACCESS]], i64 1
-+!CHECK: %[[MEMBER_ACCESS_ADDR_INT:.*]] = ptrtoint ptr %[[MEMBER_ACCESS_ADDR_END]] to i64
-+!CHECK: %[[MEMBER_ACCESS_ADDR_BEGIN:.*]] = ptrtoint ptr %[[MEMBER_ACCESS]] to i64
-+!CHECK: %[[DTYPE_SEGMENT_SIZE:.*]] = sub i64 %[[MEMBER_ACCESS_ADDR_INT]], %[[MEMBER_ACCESS_ADDR_BEGIN]]
-+!CHECK: %[[DTYPE_SIZE_CALC:.*]] = sdiv exact i64 %[[DTYPE_SEGMENT_SIZE]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
-+!CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-+!CHECK: store ptr %[[MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
-+!CHECK: store i64 %[[DTYPE_SIZE_CALC]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
-+!CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
-+!CHECK: store ptr %[[MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
-+!CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-+!CHECK: store ptr %[[MEMBER_DESCRIPTOR_BASE_ADDR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-+!CHECK: store ptr %[[MEMBER_DESCRIPTOR_BASE_ADDR]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-+!CHECK: store ptr %array_offset, ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 3
-+!CHECK: store i64 %[[DESC_BASE_ADDR_DATA_SIZE]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-+
-+!CHECK-LABEL: define {{.*}} @{{.*}}maptype_alloca_derived_type_{{.*}}
-+!CHECK: %{{.*}} = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8
-+!CHECK: %[[DTYPE_DESC_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8
-+!CHECK: %[[DTYPE_ARRAY_MEMBER_DESC_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
-+!CHECK: %[[DTYPE_DESC_ALLOCA_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8
-+!CHECK: %[[DTYPE_DESC_ALLOCA_3:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1, align 8
-+!CHECK: %[[ACCESS_DESC_MEMBER_UB:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ARRAY_MEMBER_DESC_ALLOCA]], i32 0, i32 7, i64 0, i32 1
-+!CHECK: %[[LOAD_DESC_MEMBER_UB:.*]] = load i64, ptr %[[ACCESS_DESC_MEMBER_UB]], align 8
-+!CHECK: %[[OFFSET_MEMBER_UB:.*]] = sub i64 %[[LOAD_DESC_MEMBER_UB]], 1
-+!CHECK: %[[DTYPE_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_2]], i32 0, i32 0
-+!CHECK: %[[DTYPE_BASE_ADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS]], align 8
-+!CHECK: %[[DTYPE_ALLOCA_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_alloca_derived_typeTone_layer, ptr %[[DTYPE_BASE_ADDR_LOAD]], i32 0, i32 4
-+!CHECK: %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ALLOCA_MEMBER_ACCESS]], i32 0, i32 0
-+!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA]], i32 0, i32 0
-+!CHECK: %[[DTYPE_BASE_ADDR_LOAD_2:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_2]], align 8
-+!CHECK: %[[DTYPE_NONALLOCA_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_alloca_derived_typeTone_layer, ptr %[[DTYPE_BASE_ADDR_LOAD_2]], i32 0, i32 5
-+!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_3:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 0, i32 0
-+!CHECK: %[[MEMBER_SIZE_CALC_1:.*]] = sub i64 %[[OFFSET_MEMBER_UB]], 0
-+!CHECK: %[[MEMBER_SIZE_CALC_2:.*]] = add i64 %[[MEMBER_SIZE_CALC_1]], 1
-+!CHECK: %[[MEMBER_SIZE_CALC_3:.*]] = mul i64 1, %[[MEMBER_SIZE_CALC_2]]
-+!CHECK: %[[MEMBER_SIZE_CALC_4:.*]] = mul i64 %[[MEMBER_SIZE_CALC_3]], 4
-+!CHECK: %[[DTYPE_BASE_ADDR_LOAD_3:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], align 8
-+!CHECK: %[[LOAD_DTYPE_DESC_MEMBER:.*]] = load ptr, ptr %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS]], align 8
-+!CHECK: %[[MEMBER_ARRAY_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[LOAD_DTYPE_DESC_MEMBER]], i64 0
-+!CHECK: %[[DTYPE_END_OFFSET:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
-+!CHECK: %[[DTYPE_END:.*]] = ptrtoint ptr %[[DTYPE_END_OFFSET]] to i64
-+!CHECK: %[[DTYPE_BEGIN:.*]] = ptrtoint ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
-+!CHECK: %[[DTYPE_DESC_SZ_CALC:.*]] = sub i64 %[[DTYPE_END]], %[[DTYPE_BEGIN]]
-+!CHECK: %[[DTYPE_DESC_SZ:.*]] = sdiv exact i64 %[[DTYPE_DESC_SZ_CALC]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [8 x i64], ptr %.offload_sizes, i32 0, i32 0
-+!CHECK: store i64 %[[DTYPE_DESC_SZ]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 1
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-+!CHECK: store ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-+!CHECK: store ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-+!CHECK: store ptr %[[DTYPE_BASE_ADDR_LOAD_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 4
-+!CHECK: store ptr %[[DTYPE_ALLOCA_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 5
-+!CHECK: store ptr %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
-+!CHECK: store ptr %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 6
-+!CHECK: store ptr %[[MEMBER_ARRAY_OFFSET]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [8 x i64], ptr %.offload_sizes, i32 0, i32 6
-+!CHECK: store i64 %[[MEMBER_SIZE_CALC_4]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 7
-+!CHECK: store ptr %[[DTYPE_NONALLOCA_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+
-+!CHECK-LABEL: define {{.*}} @{{.*}}maptype_alloca_nested_derived_type{{.*}}
-+!CHECK: %{{.*}} = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8
-+!CHECK: %[[DTYPE_DESC_ALLOCA_1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8
-+!CHECK: %[[ALLOCATABLE_MEMBER_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
-+!CHECK: %[[DTYPE_DESC_ALLOCA_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8
-+!CHECK: %[[DTYPE_DESC_ALLOCA_3:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1, align 8
-+!CHECK: %[[ALLOCATABLE_MEMBER_ALLOCA_UB:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCATABLE_MEMBER_ALLOCA]], i32 0, i32 7, i64 0, i32 1
-+!CHECK: %[[ALLOCATABLE_MEMBER_ALLOCA_UB_LOAD:.*]] = load i64, ptr %[[ALLOCATABLE_MEMBER_ALLOCA_UB]], align 8
-+!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_1:.*]] = sub i64 %[[ALLOCATABLE_MEMBER_ALLOCA_UB_LOAD]], 1
-+!CHECK: %[[DTYPE_DESC_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_2]], i32 0, i32 0
-+!CHECK: %[[DTYPE_DESC_BASE_ADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_DESC_BASE_ADDR_ACCESS]], align 8
-+!CHECK: %[[NESTED_DTYPE_ACCESS:.*]] = getelementptr %_QFmaptype_alloca_nested_derived_typeTtop_layer, ptr %[[DTYPE_DESC_BASE_ADDR_LOAD]], i32 0, i32 6
-+!CHECK: %[[MAPPED_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_alloca_nested_derived_typeTmiddle_layer, ptr %[[NESTED_DTYPE_ACCESS]], i32 0, i32 2
-+!CHECK: %[[MAPPED_MEMBER_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[MAPPED_MEMBER_ACCESS]], i32 0, i32 0
-+!CHECK: %[[DTYPE_DESC_BASE_ADDR_ACCESS_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_1]], i32 0, i32 0
-+!CHECK: %[[DTYPE_DESC_BASE_ADDR_LOAD_2:.*]] = load ptr, ptr %[[DTYPE_DESC_BASE_ADDR_ACCESS_2]], align 8
-+!CHECK: %[[NESTED_DTYPE_ACCESS:.*]] = getelementptr %_QFmaptype_alloca_nested_derived_typeTtop_layer, ptr %[[DTYPE_DESC_BASE_ADDR_LOAD_2]], i32 0, i32 6
-+!CHECK: %[[NESTED_NONALLOCA_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_alloca_nested_derived_typeTmiddle_layer, ptr %[[NESTED_DTYPE_ACCESS]], i32 0, i32 3
-+!CHECK: %[[DTYPE_DESC_BASE_ADDR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 0, i32 0
-+!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_2:.*]] = sub i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_1]], 0
-+!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_3:.*]] = add i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_2]], 1
-+!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_4:.*]] = mul i64 1, %[[ALLOCATABLE_MEMBER_SIZE_CALC_3]]
-+!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_5:.*]] = mul i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_4]], 4
-+!CHECK: %[[LOAD_BASE_ADDR:.*]] = load ptr, ptr %[[DTYPE_DESC_BASE_ADDR]], align 8
-+!CHECK: %[[LOAD_DESC_MEMBER_BASE_ADDR:.*]] = load ptr, ptr %[[MAPPED_MEMBER_BASE_ADDR_ACCESS]], align 8
-+!CHECK: %[[ARRAY_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[LOAD_DESC_MEMBER_BASE_ADDR]], i64 0
-+!CHECK: %[[DTYPE_DESC_SIZE_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
-+!CHECK: %[[DTYPE_DESC_SIZE_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_DESC_SIZE_CALC_1]] to i64
-+!CHECK: %[[DTYPE_DESC_SIZE_CALC_3:.*]] = ptrtoint ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
-+!CHECK: %[[DTYPE_DESC_SIZE_CALC_4:.*]] = sub i64 %[[DTYPE_DESC_SIZE_CALC_2]], %[[DTYPE_DESC_SIZE_CALC_3]]
-+!CHECK: %[[DTYPE_DESC_SIZE_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_DESC_SIZE_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [8 x i64], ptr %.offload_sizes, i32 0, i32 0
-+!CHECK: store i64 %[[DTYPE_DESC_SIZE_CALC_5]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 1
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-+!CHECK: store ptr %[[DTYPE_DESC_BASE_ADDR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-+!CHECK: store ptr %[[DTYPE_DESC_BASE_ADDR]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-+!CHECK: store ptr %[[LOAD_BASE_ADDR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 4
-+!CHECK: store ptr %[[MAPPED_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 5
-+!CHECK: store ptr %[[MAPPED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
-+!CHECK: store ptr %[[MAPPED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 6
-+!CHECK: store ptr %[[ARRAY_OFFSET]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [8 x i64], ptr %.offload_sizes, i32 0, i32 6
-+!CHECK: store i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_5]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
-+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 7
-+!CHECK: store ptr %[[NESTED_NONALLOCA_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+
-+!CHECK-LABEL: define {{.*}} @{{.*}}maptype_nested_derived_type_alloca{{.*}}
-+!CHECK: %[[ALLOCATABLE_MEMBER_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
-+!CHECK: %[[ALLOCA:.*]] = alloca %_QFmaptype_nested_derived_type_allocaTtop_layer, i64 1, align 8
-+!CHECK: %[[ALLOCATABLE_MEMBER_BASE_ADDR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCATABLE_MEMBER_ALLOCA]], i32 0, i32 7, i64 0, i32 1
-+!CHECK: %[[ALLOCATABLE_MEMBER_ADDR_LOAD:.*]] = load i64, ptr %[[ALLOCATABLE_MEMBER_BASE_ADDR]], align 8
-+!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_1:.*]] = sub i64 %[[ALLOCATABLE_MEMBER_ADDR_LOAD]], 1
-+!CHECK: %[[NESTED_DTYPE_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_nested_derived_type_allocaTtop_layer, ptr %[[ALLOCA]], i32 0, i32 6
-+!CHECK: %[[NESTED_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_nested_derived_type_allocaTmiddle_layer, ptr %[[NESTED_DTYPE_MEMBER_ACCESS]], i32 0, i32 2
-+!CHECK: %[[NESTED_MEMBER_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_MEMBER_ACCESS]], i32 0, i32 0
-+!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_2:.*]] = sub i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_1]], 0
-+!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_3:.*]] = add i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_2]], 1
-+!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_4:.*]] = mul i64 1, %[[ALLOCATABLE_MEMBER_SIZE_CALC_3]]
-+!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_5:.*]] = mul i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_4]], 4
-+!CHECK: %[[NESTED_MEMBER_BASE_ADDR_ACCESS_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_MEMBER_ACCESS]], i64 1
-+!CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_1:.*]] = ptrtoint ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS_2]] to i64
-+!CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_2:.*]] = ptrtoint ptr %[[NESTED_MEMBER_ACCESS]] to i64
-+!CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_3:.*]] = sub i64 %[[DTYPE_SEGMENT_SIZE_CALC_1]], %[[DTYPE_SEGMENT_SIZE_CALC_2]]
-+!CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_4:.*]] = sdiv exact i64 %[[DTYPE_SEGMENT_SIZE_CALC_3]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
-+!CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-+!CHECK: store ptr %[[NESTED_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
-+!CHECK: store i64 %[[DTYPE_SEGMENT_SIZE_CALC_4]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
-+!CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
-+!CHECK: store ptr %[[NESTED_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
-+!CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-+!CHECK: store ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-+!CHECK: store ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-+!CHECK: store ptr %array_offset, ptr %[[OFFLOAD_PTR_ARR]], align 8
-+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 3
-+!CHECK: store i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_5]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-+
- !CHECK-LABEL: define {{.*}} @{{.*}}maptype_common_block_{{.*}}
- !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
- !CHECK: store ptr @var_common_, ptr %[[BASE_PTR_ARR]], align 8
-diff -Naur -x .git llvm-project.upstream/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 llvm-project/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
---- llvm-project.upstream/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90	2024-11-19 12:55:00.598014540 -0500
-+++ llvm-project/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90	2024-11-19 12:49:05.197149213 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 llvm-project-trunk-atd/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
+--- llvm-project-trunk/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90	2024-10-18 17:40:44.700867251 -0500
++++ llvm-project-trunk-atd/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90	2024-10-18 17:40:53.632775674 -0500
 @@ -72,80 +72,80 @@
  ! CHECK-NEXT:    br label %omp.private.copy
  
@@ -12053,9 +10292,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Integration/OpenMP/parallel-
 -! CHECK:       omp.par.outlined.exit.exitStub:                   ; preds = %omp.region.cont50
 +! CHECK:       omp.par.outlined.exit.exitStub:                   ; preds = %omp.region.cont51
  ! CHECK-NEXT:    ret void
-diff -Naur -x .git llvm-project.upstream/flang/test/Integration/OpenMP/target-filtering.f90 llvm-project/flang/test/Integration/OpenMP/target-filtering.f90
---- llvm-project.upstream/flang/test/Integration/OpenMP/target-filtering.f90	2023-12-18 11:20:49.359182832 -0500
-+++ llvm-project/flang/test/Integration/OpenMP/target-filtering.f90	2024-05-13 09:10:50.719094787 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/Integration/OpenMP/target-filtering.f90 llvm-project-trunk-atd/flang/test/Integration/OpenMP/target-filtering.f90
+--- llvm-project-trunk/flang/test/Integration/OpenMP/target-filtering.f90	2024-08-27 20:04:05.672040790 -0500
++++ llvm-project-trunk-atd/flang/test/Integration/OpenMP/target-filtering.f90	2024-08-28 08:37:25.116601277 -0500
 @@ -7,7 +7,7 @@
  !===----------------------------------------------------------------------===!
  
@@ -12065,9 +10304,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Integration/OpenMP/target-fi
  
  !HOST: define {{.*}}@{{.*}}before{{.*}}(
  !DEVICE-NOT: define {{.*}}@before{{.*}}(
-diff -Naur -x .git llvm-project.upstream/flang/test/lit.cfg.py llvm-project/flang/test/lit.cfg.py
---- llvm-project.upstream/flang/test/lit.cfg.py	2024-11-19 12:55:00.632014430 -0500
-+++ llvm-project/flang/test/lit.cfg.py	2024-09-24 17:16:21.660863828 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/lit.cfg.py llvm-project-trunk-atd/flang/test/lit.cfg.py
+--- llvm-project-trunk/flang/test/lit.cfg.py	2024-10-18 17:40:44.720867045 -0500
++++ llvm-project-trunk-atd/flang/test/lit.cfg.py	2024-09-13 09:59:03.557877989 -0500
 @@ -132,13 +132,13 @@
  tools = [
      ToolSubst(
@@ -12084,9 +10323,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/lit.cfg.py llvm-project/flan
          extra_args=["-fc1"],
          unresolved="fatal",
      ),
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/Intrinsics/command_argument_count.f90 llvm-project/flang/test/Lower/Intrinsics/command_argument_count.f90
---- llvm-project.upstream/flang/test/Lower/Intrinsics/command_argument_count.f90	2024-11-19 12:55:00.603014524 -0500
-+++ llvm-project/flang/test/Lower/Intrinsics/command_argument_count.f90	2024-02-19 15:30:03.921299556 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/Intrinsics/command_argument_count.f90 llvm-project-trunk-atd/flang/test/Lower/Intrinsics/command_argument_count.f90
+--- llvm-project-trunk/flang/test/Lower/Intrinsics/command_argument_count.f90	2024-10-18 17:40:44.704867210 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/Intrinsics/command_argument_count.f90	2024-08-27 20:12:45.293833269 -0500
 @@ -1,6 +1,6 @@
  ! RUN: bbc -emit-fir %s -o - | FileCheck %s
 -! bbc doesn't have a way to set the default kinds so we use flang driver
@@ -12096,9 +10335,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/Intrinsics/command_arg
  
  ! CHECK-LABEL: argument_count_test
  subroutine argument_count_test()
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/Intrinsics/exit.f90 llvm-project/flang/test/Lower/Intrinsics/exit.f90
---- llvm-project.upstream/flang/test/Lower/Intrinsics/exit.f90	2024-11-19 12:55:00.604014521 -0500
-+++ llvm-project/flang/test/Lower/Intrinsics/exit.f90	2024-02-19 15:30:03.921299556 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/Intrinsics/exit.f90 llvm-project-trunk-atd/flang/test/Lower/Intrinsics/exit.f90
+--- llvm-project-trunk/flang/test/Lower/Intrinsics/exit.f90	2024-10-18 17:40:44.704867210 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/Intrinsics/exit.f90	2024-08-27 20:12:45.297833249 -0500
 @@ -1,5 +1,5 @@
  ! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck --check-prefixes=CHECK,CHECK-32 -DDEFAULT_INTEGER_SIZE=32 %s
 -! bbc doesn't have a way to set the default kinds so we use flang driver
@@ -12106,9 +10345,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/Intrinsics/exit.f90 ll
  ! RUN: %flang_fc1 -fdefault-integer-8 -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck --check-prefixes=CHECK,CHECK-64 -DDEFAULT_INTEGER_SIZE=64 %s
  
  ! CHECK-LABEL: func @_QPexit_test1() {
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/Intrinsics/ieee_is_normal.f90 llvm-project/flang/test/Lower/Intrinsics/ieee_is_normal.f90
---- llvm-project.upstream/flang/test/Lower/Intrinsics/ieee_is_normal.f90	2024-11-19 12:55:00.604014521 -0500
-+++ llvm-project/flang/test/Lower/Intrinsics/ieee_is_normal.f90	2024-02-19 15:30:03.921299556 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/Intrinsics/ieee_is_normal.f90 llvm-project-trunk-atd/flang/test/Lower/Intrinsics/ieee_is_normal.f90
+--- llvm-project-trunk/flang/test/Lower/Intrinsics/ieee_is_normal.f90	2024-10-18 17:40:44.704867210 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/Intrinsics/ieee_is_normal.f90	2024-08-27 20:12:45.297833249 -0500
 @@ -1,5 +1,5 @@
  ! RUN: bbc -emit-fir %s -o - | FileCheck %s
 -! RUN: flang -fc1 -emit-fir %s -o - | FileCheck %s
@@ -12116,9 +10355,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/Intrinsics/ieee_is_nor
  
  ! CHECK-LABEL: ieee_is_normal_f16
  subroutine ieee_is_normal_f16(r)
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/Intrinsics/isnan.f90 llvm-project/flang/test/Lower/Intrinsics/isnan.f90
---- llvm-project.upstream/flang/test/Lower/Intrinsics/isnan.f90	2024-11-19 12:55:00.604014521 -0500
-+++ llvm-project/flang/test/Lower/Intrinsics/isnan.f90	2024-02-19 15:30:03.921299556 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/Intrinsics/isnan.f90 llvm-project-trunk-atd/flang/test/Lower/Intrinsics/isnan.f90
+--- llvm-project-trunk/flang/test/Lower/Intrinsics/isnan.f90	2024-10-18 17:40:44.704867210 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/Intrinsics/isnan.f90	2024-08-27 20:12:45.297833249 -0500
 @@ -1,5 +1,5 @@
  ! RUN: bbc -emit-fir %s -o - | FileCheck %s
 -! RUN: flang -fc1 -emit-fir %s -o - | FileCheck %s
@@ -12126,9 +10365,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/Intrinsics/isnan.f90 l
  
  ! CHECK-LABEL: isnan_f32
  subroutine isnan_f32(r)
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/Intrinsics/modulo.f90 llvm-project/flang/test/Lower/Intrinsics/modulo.f90
---- llvm-project.upstream/flang/test/Lower/Intrinsics/modulo.f90	2024-11-19 12:55:00.604014521 -0500
-+++ llvm-project/flang/test/Lower/Intrinsics/modulo.f90	2024-05-07 13:04:07.493067690 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/Intrinsics/modulo.f90 llvm-project-trunk-atd/flang/test/Lower/Intrinsics/modulo.f90
+--- llvm-project-trunk/flang/test/Lower/Intrinsics/modulo.f90	2024-10-18 17:40:44.708867169 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/Intrinsics/modulo.f90	2024-08-27 20:12:45.301833230 -0500
 @@ -1,5 +1,5 @@
  ! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s -check-prefixes=HONORINF,ALL
 -! RUN: flang -fc1 -menable-no-infs -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s -check-prefixes=CHECK,ALL
@@ -12136,9 +10375,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/Intrinsics/modulo.f90
  
  ! ALL-LABEL: func @_QPmodulo_testr(
  ! ALL-SAME: %[[arg0:.*]]: !fir.ref<f64>{{.*}}, %[[arg1:.*]]: !fir.ref<f64>{{.*}}, %[[arg2:.*]]: !fir.ref<f64>{{.*}}) {
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenACC/acc-enter-data.f90 llvm-project/flang/test/Lower/OpenACC/acc-enter-data.f90
---- llvm-project.upstream/flang/test/Lower/OpenACC/acc-enter-data.f90	2024-07-09 19:05:25.630801604 -0400
-+++ llvm-project/flang/test/Lower/OpenACC/acc-enter-data.f90	2024-09-24 17:16:21.652863855 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenACC/acc-enter-data.f90 llvm-project-trunk-atd/flang/test/Lower/OpenACC/acc-enter-data.f90
+--- llvm-project-trunk/flang/test/Lower/OpenACC/acc-enter-data.f90	2024-08-27 20:04:05.688040739 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/OpenACC/acc-enter-data.f90	2024-11-06 08:36:36.183053740 -0600
 @@ -808,10 +808,10 @@
  !CHECK:           %[[VAL_42:.*]] = arith.constant 1 : index
  !CHECK:           %[[VAL_43:.*]] = arith.constant 1 : index
@@ -12152,60 +10391,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenACC/acc-enter-data
  !CHECK:           %[[CREATE:.*]] = acc.create varPtr(%[[VAL_41]] : !fir.heap<!fir.array<?x?xf32>>) bounds(%[[VAL_45]], %[[VAL_48]]) -> !fir.heap<!fir.array<?x?xf32>> {name = "e(2_8)%a(1,2)", structured = false}
  !CHECK:           acc.enter_data dataOperands(%[[CREATE]] : !fir.heap<!fir.array<?x?xf32>>)
  
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/allocatable-array-bounds.f90 llvm-project/flang/test/Lower/OpenMP/allocatable-array-bounds.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/allocatable-array-bounds.f90	2024-11-19 12:55:58.495825724 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/allocatable-array-bounds.f90	2024-09-24 17:16:21.652863855 -0400
-@@ -24,7 +24,7 @@
- !HOST: %[[BOUNDS_1:.*]] = omp.map.bounds lower_bound(%[[LB_1]] : index) upper_bound(%[[UB_1]] : index) extent(%[[BOX_3]]#1 : index) stride(%[[BOX_2]]#2 : index) start_idx(%[[BOX_1]]#0 : index) {stride_in_bytes = true}
- !HOST: %[[VAR_PTR_PTR:.*]] = fir.box_offset %[[DECLARE_1]]#1 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
- !HOST: %[[MAP_INFO_MEMBER:.*]] = omp.map.info var_ptr(%[[DECLARE_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[VAR_PTR_PTR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_1]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}    
--!HOST: %[[MAP_INFO_1:.*]] = omp.map.info var_ptr(%[[DECLARE_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "sp_read(2:5)"}
-+!HOST: %[[MAP_INFO_1:.*]] = omp.map.info var_ptr(%[[DECLARE_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "sp_read(2:5)"}
- 
- !HOST: %[[LOAD_3:.*]] = fir.load %[[DECLARE_2]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
- !HOST: %[[LOAD_4:.*]] = fir.load %[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-@@ -42,7 +42,7 @@
- !HOST: %[[BOUNDS_2:.*]] = omp.map.bounds lower_bound(%[[LB_2]] : index) upper_bound(%[[UB_2]] : index) extent(%[[BOX_5]]#1 : index) stride(%[[BOX_4]]#2 : index) start_idx(%[[BOX_3]]#0 : index) {stride_in_bytes = true}
- !HOST: %[[VAR_PTR_PTR:.*]] = fir.box_offset %[[DECLARE_2]]#1 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
- !HOST: %[[MAP_INFO_MEMBER:.*]] = omp.map.info var_ptr(%[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[VAR_PTR_PTR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}    
--!HOST: %[[MAP_INFO_2:.*]] = omp.map.info var_ptr(%[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "sp_write(2:5)"}
-+!HOST: %[[MAP_INFO_2:.*]] = omp.map.info var_ptr(%[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "sp_write(2:5)"}
- 
- subroutine read_write_section()
-     integer, allocatable :: sp_read(:)
-@@ -81,7 +81,7 @@
- !HOST: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%[[LB]] : index) upper_bound(%[[UB]] : index) extent(%[[BOX_3]]#1 : index) stride(%[[BOX_2]]#2 : index) start_idx(%[[BOX_1]]#0 : index) {stride_in_bytes = true}
- !HOST: %[[VAR_PTR_PTR:.*]] = fir.box_offset %[[DECLARE]]#1 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
- !HOST: %[[MAP_INFO_MEMBER:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[VAR_PTR_PTR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}    
--!HOST: %[[MAP_INFO:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "arr_read_write(2:5)"}    
-+!HOST: %[[MAP_INFO:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "arr_read_write(2:5)"}
- subroutine assumed_shape_array(arr_read_write)
-     integer, allocatable, intent(inout) :: arr_read_write(:)
- 
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/allocatable-map.f90 llvm-project/flang/test/Lower/OpenMP/allocatable-map.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/allocatable-map.f90	2024-11-19 12:55:58.495825724 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/allocatable-map.f90	2024-09-24 17:16:21.652863855 -0400
-@@ -3,7 +3,7 @@
- !HLFIRDIALECT: %[[POINTER:.*]]:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointer_routineEpoint"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
- !HLFIRDIALECT: %[[BOX_OFF:.*]] = fir.box_offset %[[POINTER]]#1 base_addr : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> !fir.llvm_ptr<!fir.ref<i32>>
- !HLFIRDIALECT: %[[POINTER_MAP_MEMBER:.*]] = omp.map.info var_ptr(%[[POINTER]]#1 : !fir.ref<!fir.box<!fir.ptr<i32>>>, i32) var_ptr_ptr(%[[BOX_OFF]] : !fir.llvm_ptr<!fir.ref<i32>>)  map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<i32>> {name = ""}
--!HLFIRDIALECT: %[[POINTER_MAP:.*]] = omp.map.info var_ptr(%[[POINTER]]#1 : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(tofrom) capture(ByRef) members(%[[POINTER_MAP_MEMBER]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "point"}
-+!HLFIRDIALECT: %[[POINTER_MAP:.*]] = omp.map.info var_ptr(%[[POINTER]]#1 : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(to) capture(ByRef) members(%[[POINTER_MAP_MEMBER]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "point"}
- !HLFIRDIALECT: omp.target map_entries(%[[POINTER_MAP_MEMBER]] -> {{.*}}, %[[POINTER_MAP]] -> {{.*}} : !fir.llvm_ptr<!fir.ref<i32>>, !fir.ref<!fir.box<!fir.ptr<i32>>>) {
- subroutine pointer_routine()
-     integer, pointer :: point 
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/array-bounds.f90 llvm-project/flang/test/Lower/OpenMP/array-bounds.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/array-bounds.f90	2024-11-19 12:55:58.496825721 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/array-bounds.f90	2024-09-24 17:16:21.652863855 -0400
-@@ -52,7 +52,7 @@
- !HOST: %[[BOUNDS:.*]] = omp.map.bounds   lower_bound(%[[C3]] : index) upper_bound(%[[C4]] : index) extent(%[[DIMS1]]#1 : index) stride(%[[DIMS0]]#2 : index) start_idx(%[[C0]] : index) {stride_in_bytes = true}
- !HOST: %[[VAR_PTR_PTR:.*]] = fir.box_offset %0 base_addr : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
- !HOST: %[[MAP_INFO_MEMBER:.*]] = omp.map.info var_ptr(%[[INTERMEDIATE_ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.array<?xi32>) var_ptr_ptr(%[[VAR_PTR_PTR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
--!HOST: %[[MAP:.*]] = omp.map.info var_ptr(%[[INTERMEDIATE_ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.box<!fir.array<?xi32>>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> {name = "arr_read_write(2:5)"}
-+!HOST: %[[MAP:.*]] = omp.map.info var_ptr(%[[INTERMEDIATE_ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.box<!fir.array<?xi32>>) map_clauses(to) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> {name = "arr_read_write(2:5)"}
- !HOST: omp.target   map_entries(%[[MAP_INFO_MEMBER]] -> %{{.*}}, %[[MAP]] -> %{{.*}}, {{.*}} -> {{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.array<?xi32>>, !fir.ref<i32>) {
-     subroutine assumed_shape_array(arr_read_write)
-             integer, intent(inout) :: arr_read_write(:)
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/array-bounds.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/array-bounds.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/array-bounds.f90	2024-11-23 20:25:46.231205467 -0600
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/array-bounds.f90	2024-11-23 20:26:13.831106231 -0600
 @@ -65,13 +65,15 @@
      end subroutine assumed_shape_array
  
@@ -12223,196 +10411,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/array-bounds.f9
  !HOST: %[[EXT:.*]] = arith.addi %[[C4_1]], %c1{{.*}} : index
  !HOST: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%c1{{.*}} : index) upper_bound(%c4{{.*}} : index) extent(%[[EXT]] : index) stride(%[[DIMS0]]#2 : index) start_idx(%c1{{.*}} : index) {stride_in_bytes = true}
  !HOST: %[[MAP:.*]] = omp.map.info var_ptr(%[[ARG0_DECL]]#1 : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xi32>> {name = "arr_read_write(2:5)"}
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/declare-target-link-tarop-cap.f90 llvm-project/flang/test/Lower/OpenMP/declare-target-link-tarop-cap.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/declare-target-link-tarop-cap.f90	2024-11-19 12:55:58.496825721 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/declare-target-link-tarop-cap.f90	2024-09-24 17:16:21.656863842 -0400
-@@ -35,7 +35,7 @@
- 
-   allocate(test_ptr1)
-   test_ptr1 = 1
--  !CHECK-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(implicit, tofrom) capture(ByRef) members({{%.*}} : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "test_ptr1"}
-+  !CHECK-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(implicit, to) capture(ByRef) members({{%.*}} : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "test_ptr1"}
-   !$omp target
-     test_ptr1 = test_ptr1 + 1
-   !$omp end target
-@@ -46,7 +46,7 @@
-   !$omp end target
- 
- 
--  !CHECK-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(implicit, tofrom) capture(ByRef) members({{%.*}} : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "test_ptr2"}
-+  !CHECK-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(implicit, to) capture(ByRef) members({{%.*}} : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "test_ptr2"}
-   test_ptr2 => test_target
-   !$omp target
-     test_ptr2 = test_ptr2 + 1
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90 llvm-project/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90	2024-11-19 12:49:05.207149182 -0500
-@@ -0,0 +1,162 @@
-+!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
-+
-+
-+!CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}> {bindc_name = "one_l", uniq_name = "_QFtest_derived_type_allocatable_map_operand_and_block_additionEone_l"}
-+!CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFtest_derived_type_allocatable_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) -> (!fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>)
-+!CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true}
-+!CHECK: %[[MEMBER_INDEX:.*]] = arith.constant 4 : index
-+!CHECK: %[[MEMBER_COORD:.*]] = fir.coordinate_of %[[DECLARE]]#0, %[[MEMBER_INDEX]] : (!fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+!CHECK: %[[MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[MEMBER_COORD]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
-+!CHECK: %[[MAP_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[MEMBER_BASE_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
-+!CHECK: %[[MAP_MEMBER_DESCRIPTOR:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%array_j"}
-+!CHECK: %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, !fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_DESCRIPTOR]], %[[MAP_MEMBER_BASE_ADDR]] : [4], [4,0] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>> {name = "one_l", partial_map = true}
-+!CHECK:   omp.target map_entries(%[[MAP_MEMBER_DESCRIPTOR]] -> %[[ARG0:.*]], %[[MAP_MEMBER_BASE_ADDR]] -> %[[ARG1:.*]], %[[MAP_PARENT]] -> %[[ARG2:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) {
-+!CHECK:        %{{.*}}:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFtest_derived_type_allocatable_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) -> (!fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>)
-+subroutine test_derived_type_allocatable_map_operand_and_block_addition()
-+    type :: one_layer
-+    real(4) :: i
-+    integer, allocatable :: scalar
-+    integer(4) :: array_i(10)
-+    real(4) :: j
-+    integer, allocatable :: array_j(:)
-+    integer(4) :: k
-+    end type one_layer
-+
-+    type(one_layer) :: one_l
-+
-+    allocate(one_l%array_j(10))
-+
-+    !$omp target map(tofrom: one_l%array_j)
-+        one_l%array_j(1) = 10
-+    !$omp end target
-+end subroutine
-+
-+!CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>> {bindc_name = "one_l", uniq_name = "_QFtest_allocatable_derived_type_map_operand_and_block_additionEone_l"}
-+!CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_allocatable_derived_type_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>)
-+!CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true}
-+!CHECK: %[[LOAD_DTYPE:.*]] = fir.load %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>
-+!CHECK: %[[MEMBER_INDEX:.*]] = arith.constant 4 : index
-+!CHECK: %[[MEMBER_COORD:.*]] = fir.coordinate_of %[[LOAD_DTYPE]], %[[MEMBER_INDEX]] : (!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+!CHECK: %[[MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[MEMBER_COORD]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
-+!CHECK: %[[MAP_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[MEMBER_BASE_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
-+!CHECK: %[[MAP_MEMBER_DESC:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%array_j"}
-+!CHECK: %[[LOAD_DTYPE:.*]] = fir.load %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>
-+!CHECK: %[[MEMBER_COORD:.*]] = arith.constant 5 : index
-+!CHECK: %[[REGULAR_MEMBER:.*]] = fir.coordinate_of %[[LOAD_DTYPE]], %[[MEMBER_COORD]] : (!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>, index) -> !fir.ref<i32>
-+!CHECK: %[[MAP_REGULAR_MEMBER:.*]] = omp.map.info var_ptr(%[[REGULAR_MEMBER]] : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "one_l%k"}
-+!CHECK: %[[DTYPE_BASE_ADDR:.*]] = fir.box_offset %[[DECLARE]]#1 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>
-+!CHECK: %[[MAP_DTYPE_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>, !fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>) var_ptr_ptr(%[[DTYPE_BASE_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>> {name = ""}
-+!CHECK: %[[MAP_DTYPE_DESC:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>, !fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>) map_clauses(to) capture(ByRef) members(%[[MAP_DTYPE_BASE_ADDR]], %[[MAP_MEMBER_DESC]], %[[MAP_MEMBER_BASE_ADDR]], %[[MAP_REGULAR_MEMBER]] : [0], [0,4], [0,4,0], [0,5] : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<i32>) -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>> {name = "one_l"}
-+!CHECK: omp.target map_entries(%[[MAP_DTYPE_BASE_ADDR]] -> %[[ARG0:.*]], %[[MAP_MEMBER_DESC]] -> %[[ARG1:.*]], %[[MAP_MEMBER_BASE_ADDR]] -> %[[ARG2:.*]], %[[MAP_REGULAR_MEMBER]] -> %[[ARG3:.*]], %[[MAP_DTYPE_DESC]] -> %[[ARG4:.*]] : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<i32>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>) {
-+!CHECK:  %{{.*}}:2 = hlfir.declare %[[ARG4]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_allocatable_derived_type_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>)
-+subroutine test_allocatable_derived_type_map_operand_and_block_addition()
-+    type :: one_layer
-+    real(4) :: i
-+    integer, allocatable :: scalar
-+    integer(4) :: array_i(10)
-+    real(4) :: j
-+    integer, allocatable :: array_j(:)
-+    integer(4) :: k
-+    end type one_layer
-+
-+    type(one_layer), allocatable :: one_l
-+
-+    allocate(one_l)
-+    allocate(one_l%array_j(10))
-+
-+    !$omp target map(tofrom: one_l%array_j, one_l%k)
-+        one_l%array_j(1) = 10
-+        one_l%k = 20
-+    !$omp end target
-+end subroutine
-+
-+!CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>> {bindc_name = "one_l", uniq_name = "_QFtest_alloca_nested_derived_type_map_operand_and_block_additionEone_l"}
-+!CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_alloca_nested_derived_type_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>)
-+!CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true}
-+!CHECK: %[[LOAD:.*]] = fir.load %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>
-+!CHECK: %[[NESTED_DTYPE_INDEX:.*]] = arith.constant 6 : index
-+!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[LOAD]], %[[NESTED_DTYPE_INDEX]] : (!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>, index) -> !fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+!CHECK: %[[NESTED_MEMBER_INDEX:.*]] = arith.constant 2 : index
-+!CHECK: %[[NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], %[[NESTED_MEMBER_INDEX]] : (!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+!CHECK: %[[NESTED_MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[NESTED_MEMBER_COORD]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
-+!CHECK: %[[MAP_NESTED_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[NESTED_MEMBER_COORD]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[NESTED_MEMBER_BASE_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
-+!CHECK: %[[MAP_NESTED_MEMBER_COORD:.*]] = omp.map.info var_ptr(%[[NESTED_MEMBER_COORD]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%nest%array_k"}
-+!CHECK: %[[LOAD:.*]] = fir.load %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>
-+!CHECK: %[[NESTED_DTYPE_INDEX:.*]] = arith.constant 6 : index
-+!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[LOAD]], %[[NESTED_DTYPE_INDEX]] : (!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>, index) -> !fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+!CHECK: %[[NESTED_MEMBER_INDEX:.*]] = arith.constant 3 : index
-+!CHECK: %[[REGULAR_NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], %[[NESTED_MEMBER_INDEX]] : (!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<i32>
-+!CHECK: %[[MAP_REGULAR_NESTED_MEMBER:.*]] = omp.map.info var_ptr(%[[REGULAR_NESTED_MEMBER_COORD]] : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "one_l%nest%k"}
-+!CHECK: %[[DTYPE_BASE_ADDR:.*]] = fir.box_offset %[[DECLARE]]#1 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>
-+!CHECK: %[[MAP_DTYPE_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>, !fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>) var_ptr_ptr(%[[DTYPE_BASE_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>> {name = ""}
-+!CHECK: %[[MAP_DTYPE:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>, !fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>) map_clauses(to) capture(ByRef) members(%75, %69, %68, %73 : [0], [0,6,2], [0,6,2,0], [0,6,3] : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<i32>) -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>> {name = "one_l"}
-+!CHECK: omp.target map_entries(%[[MAP_DTYPE_BASE_ADDR]] -> %[[ARG0:.*]], %[[MAP_NESTED_MEMBER_COORD]] -> %[[ARG1:.*]], %[[MAP_NESTED_MEMBER_BASE_ADDR]] -> %[[ARG2:.*]], %[[MAP_REGULAR_NESTED_MEMBER]] -> %[[ARG3:.*]], %[[MAP_DTYPE]] -> %[[ARG4:.*]] : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<i32>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>) {
-+!CHECK:  %{{.*}}:2 = hlfir.declare %[[ARG4]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_alloca_nested_derived_type_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>)
-+subroutine test_alloca_nested_derived_type_map_operand_and_block_addition()
-+    type :: middle_layer
-+    real(4) :: i
-+    integer(4) :: array_i(10)
-+    integer, allocatable :: array_k(:)
-+    integer(4) :: k
-+    end type middle_layer
-+
-+    type :: top_layer
-+    real(4) :: i
-+    integer, allocatable :: scalar
-+    integer(4) :: array_i(10)
-+    real(4) :: j
-+    integer, allocatable :: array_j(:)
-+    integer(4) :: k
-+    type(middle_layer) :: nest
-+    end type top_layer
-+
-+    type(top_layer), allocatable :: one_l
-+
-+    allocate(one_l)
-+    allocate(one_l%nest%array_k(10))
-+
-+    !$omp target map(tofrom: one_l%nest%array_k, one_l%nest%k)
-+        one_l%nest%array_k(1) = 10
-+        one_l%nest%k = 20
-+    !$omp end target
-+end subroutine
-+
-+!CHECK: %[[ALLOCA]] = fir.alloca !fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}> {bindc_name = "one_l", uniq_name = "_QFtest_nested_derived_type_alloca_map_operand_and_block_additionEone_l"}
-+!CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA:.*]] {uniq_name = "_QFtest_nested_derived_type_alloca_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>) -> (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>, !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>)
-+!CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true}
-+!CHECK: %[[NESTED_DTYPE_INDEX:.*]] = arith.constant 6 : index
-+!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[DECLARE]]#0, %[[NESTED_DTYPE_INDEX]] : (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>, index) -> !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+!CHECK: %[[NESTED_MEMBER_INDEX:.*]] = arith.constant 2 : index
-+!CHECK: %[[NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], %[[NESTED_MEMBER_INDEX]] : (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+!CHECK: %[[NESTED_MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[NESTED_MEMBER_COORD]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
-+!CHECK: %[[MAP_NESTED_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[NESTED_MEMBER_COORD]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[NESTED_MEMBER_BASE_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
-+!CHECK: %[[MAP_NESTED_MEMBER_DESC:.*]] = omp.map.info var_ptr(%[[NESTED_MEMBER_COORD]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%nest%array_k"}
-+!CHECK: %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>, !fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>) map_clauses(tofrom) capture(ByRef) members(%62, %61 : [6,2], [6,2,0] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>> {name = "one_l", partial_map = true}
-+!CHECK: omp.target map_entries(%[[MAP_NESTED_MEMBER_DESC]] -> %[[ARG0:.*]], %[[MAP_NESTED_MEMBER_BASE_ADDR]] -> %[[ARG1:.*]], %[[MAP_PARENT]] -> %[[ARG2:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>) {
-+!CHECK:  %{{.*}}:2 = hlfir.declare %arg2 {uniq_name = "_QFtest_nested_derived_type_alloca_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>) -> (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>, !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>)
-+subroutine test_nested_derived_type_alloca_map_operand_and_block_addition()
-+    type :: middle_layer
-+    real(4) :: i
-+    integer(4) :: array_i(10)
-+    integer, allocatable :: array_k(:)
-+    integer(4) :: k
-+    end type middle_layer
-+
-+    type :: top_layer
-+    real(4) :: i
-+    integer, allocatable :: scalar
-+    integer(4) :: array_i(10)
-+    real(4) :: j
-+    integer, allocatable :: array_j(:)
-+    integer(4) :: k
-+    type(middle_layer) :: nest
-+    end type top_layer
-+
-+    type(top_layer) :: one_l
-+
-+    allocate(one_l%nest%array_k(10))
-+
-+    !$omp target map(tofrom: one_l%nest%array_k)
-+        one_l%nest%array_k(1) = 25
-+    !$omp end target
-+end subroutine
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/eval-outside-target.f90 llvm-project/flang/test/Lower/OpenMP/eval-outside-target.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/eval-outside-target.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/eval-outside-target.f90	2024-11-19 12:49:05.207149182 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/eval-outside-target.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/eval-outside-target.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/eval-outside-target.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/eval-outside-target.f90	2024-11-23 20:26:13.831106231 -0600
 @@ -0,0 +1,157 @@
 +! The "thread_limit" clause was added to the "target" construct in OpenMP 5.1.
 +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 %s -o - | FileCheck %s --check-prefixes=BOTH,HOST
@@ -12571,9 +10572,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/eval-outside-ta
 +  !$omp end distribute parallel do simd
 +  !$omp end teams
 +end subroutine distribute_parallel_do_simd
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90 llvm-project/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90	2024-11-19 12:49:05.205149188 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/FIR/mismatched-bound-types.f90	2024-11-14 15:29:47.026382935 -0600
 @@ -0,0 +1,35 @@
 +! RUN: %flang_fc1 -fopenmp -emit-fir %s -o - | FileCheck %s
 +
@@ -12610,9 +10611,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/FIR/mismatched-
 +        end subroutine init_arrays
 +
 +end module Test
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/function-filtering-2.f90 llvm-project/flang/test/Lower/OpenMP/function-filtering-2.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/function-filtering-2.f90	2024-11-19 12:55:00.610014501 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/function-filtering-2.f90	2024-11-19 12:49:05.207149182 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/function-filtering-2.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/function-filtering-2.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/function-filtering-2.f90	2024-10-18 14:35:09.955145704 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/function-filtering-2.f90	2024-10-18 14:35:15.719083763 -0500
 @@ -1,9 +1,9 @@
  ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-HOST %s
  ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s
@@ -12626,9 +10627,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/function-filter
  
  ! MLIR: func.func @{{.*}}implicit_invocation() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>}
  ! MLIR: return
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/function-filtering-3.f90 llvm-project/flang/test/Lower/OpenMP/function-filtering-3.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/function-filtering-3.f90	2024-05-08 09:40:59.087421054 -0400
-+++ llvm-project/flang/test/Lower/OpenMP/function-filtering-3.f90	2024-05-13 09:10:50.727094763 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/function-filtering-3.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/function-filtering-3.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/function-filtering-3.f90	2024-08-27 20:04:05.696040714 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/function-filtering-3.f90	2024-08-28 08:37:25.116601277 -0500
 @@ -1,9 +1,9 @@
  ! RUN: %flang_fc1 -fopenmp -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-HOST,LLVM-ALL %s
  ! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s
@@ -12642,9 +10643,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/function-filter
  
  ! Check that the correct LLVM IR functions are kept for the host and device
  ! after running the whole set of translation and transformation passes from
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/function-filtering.f90 llvm-project/flang/test/Lower/OpenMP/function-filtering.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/function-filtering.f90	2024-11-19 12:55:00.610014501 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/function-filtering.f90	2024-11-19 12:49:05.207149182 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/function-filtering.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/function-filtering.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/function-filtering.f90	2024-10-18 14:35:09.955145704 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/function-filtering.f90	2024-10-18 14:35:15.719083763 -0500
 @@ -1,9 +1,9 @@
  ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-HOST,LLVM-ALL %s
  ! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s
@@ -12658,9 +10659,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/function-filter
  
  ! Check that the correct LLVM IR functions are kept for the host and device
  ! after running the whole set of translation and transformation passes from
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir llvm-project/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir
---- llvm-project.upstream/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir	2024-11-19 12:49:05.208149178 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir llvm-project-trunk-atd/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir
+--- llvm-project-trunk/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir	2024-10-18 17:40:53.640775592 -0500
 @@ -0,0 +1,64 @@
 +// Tests HLFIR-to-FIR conversion aspects relevant to OpenMP. For example, that
 +// the correct alloca block is chosen for OMP regions.
@@ -12726,9 +10727,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/hlfir-to-fir-co
 +  }
 +  return
 +}
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/if-clause.f90 llvm-project/flang/test/Lower/OpenMP/if-clause.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/if-clause.f90	2024-09-09 10:42:56.144492405 -0400
-+++ llvm-project/flang/test/Lower/OpenMP/if-clause.f90	2024-09-09 10:42:24.028600755 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/if-clause.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/if-clause.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/if-clause.f90	2024-09-13 09:56:37.372632620 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/if-clause.f90	2024-09-13 09:59:03.553878062 -0500
 @@ -11,7 +11,6 @@
    ! TODO When they are supported, add tests for:
    ! - PARALLEL SECTIONS
@@ -12760,9 +10761,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/if-clause.f90 l
    ! TASK
    ! ----------------------------------------------------------------------------
    ! CHECK:      omp.task
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/loop-lifetime.f90 llvm-project/flang/test/Lower/OpenMP/loop-lifetime.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/loop-lifetime.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/loop-lifetime.f90	2024-07-16 09:33:27.108719786 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/loop-lifetime.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/loop-lifetime.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/loop-lifetime.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/loop-lifetime.f90	2024-08-28 08:37:25.116601277 -0500
 @@ -0,0 +1,91 @@
 +! This test checks the insertion of lifetime information for loop indices of
 +! OpenMP loop operations.
@@ -12855,9 +10856,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/loop-lifetime.f
 +  end do
 +  !$omp end simd 
 +end subroutine
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 llvm-project/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90	2024-11-19 12:55:00.611014498 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90	2024-11-19 12:49:05.209149175 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90	2024-10-18 17:40:44.708867169 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90	2024-10-18 17:40:53.640775592 -0500
 @@ -1,7 +1,7 @@
  ! This test checks lowering of `LASTPRIVATE` clause for scalar types.
  
@@ -12867,9 +10868,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/parallel-lastpr
  
  !CHECK: func @_QPlastprivate_character(%[[ARG1:.*]]: !fir.boxchar<1>{{.*}}) {
  !CHECK-DAG: %[[ARG1_UNBOX:.*]]:2 = fir.unboxchar
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90 llvm-project/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90	2024-11-19 12:55:00.612014495 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90	2024-05-28 09:00:55.965822152 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90	2024-10-18 17:40:44.708867169 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90	2024-08-27 20:12:45.313833168 -0500
 @@ -1,7 +1,7 @@
  ! Check that for parallel do, reduction is only processed for the loop
  
@@ -12879,9 +10880,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/parallel-wsloop
  
  ! CHECK: omp.parallel {
  ! CHECK: omp.wsloop reduction(byref @add_reduction_byref_i32
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/parallel-wsloop-reduction.f90 llvm-project/flang/test/Lower/OpenMP/parallel-wsloop-reduction.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/parallel-wsloop-reduction.f90	2024-11-19 12:55:00.612014495 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/parallel-wsloop-reduction.f90	2024-05-07 13:04:07.501067667 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/parallel-wsloop-reduction.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/parallel-wsloop-reduction.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/parallel-wsloop-reduction.f90	2024-10-18 17:40:44.708867169 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/parallel-wsloop-reduction.f90	2024-08-27 20:12:45.313833168 -0500
 @@ -1,7 +1,7 @@
  ! Check that for parallel do, reduction is only processed for the loop
  
@@ -12891,9 +10892,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/parallel-wsloop
  
  ! CHECK: omp.parallel {
  ! CHECK: omp.wsloop reduction(@add_reduction_i32
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/reduction-target-spmd.f90 llvm-project/flang/test/Lower/OpenMP/reduction-target-spmd.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/reduction-target-spmd.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/reduction-target-spmd.f90	2024-11-19 12:49:05.210149172 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/reduction-target-spmd.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/reduction-target-spmd.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/reduction-target-spmd.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/reduction-target-spmd.f90	2024-10-14 18:10:02.562470255 -0500
 @@ -0,0 +1,15 @@
 +! RUN: %flang_fc1 -emit-fir -fopenmp -o - %s | FileCheck %s
 +! RUN: bbc -emit-fir -fopenmp -o - %s | FileCheck %s
@@ -12910,9 +10911,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/reduction-targe
 +  end do
 +  !$omp end target teams distribute parallel do
 +end subroutine myfun
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/reduction-teams.f90 llvm-project/flang/test/Lower/OpenMP/reduction-teams.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/reduction-teams.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/reduction-teams.f90	2024-11-19 12:49:05.210149172 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/reduction-teams.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/reduction-teams.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/reduction-teams.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/reduction-teams.f90	2024-10-14 18:10:02.562470255 -0500
 @@ -0,0 +1,13 @@
 +! RUN: bbc -emit-fir -fopenmp -o - %s | FileCheck %s
 +! RUN: %flang_fc1 -emit-fir -fopenmp -o - %s | FileCheck %s
@@ -12927,9 +10928,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/reduction-teams
 +  i = i + 1
 +  !$omp end teams
 +end subroutine reduction_teams
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/reduction_var_map.f90 llvm-project/flang/test/Lower/OpenMP/reduction_var_map.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/reduction_var_map.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/reduction_var_map.f90	2024-05-28 09:00:55.965822152 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/reduction_var_map.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/reduction_var_map.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/reduction_var_map.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/reduction_var_map.f90	2024-08-28 08:37:25.116601277 -0500
 @@ -0,0 +1,43 @@
 +!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
 +
@@ -12974,9 +10975,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/reduction_var_m
 +end subroutine omp_target_team_separate
 +!CHECK-LABEL: func.func @_QPomp_target_team_separate() {
 +!CHECK:  omp.map.info var_ptr({{.*}} : !fir.ref<i64>, i64) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i64> {name = "s3"}
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/rtl-flags.f90 llvm-project/flang/test/Lower/OpenMP/rtl-flags.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/rtl-flags.f90	2023-10-05 13:33:01.641196611 -0400
-+++ llvm-project/flang/test/Lower/OpenMP/rtl-flags.f90	2024-11-19 12:49:05.211149169 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/rtl-flags.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/rtl-flags.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/rtl-flags.f90	2024-08-27 20:04:05.696040714 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/rtl-flags.f90	2024-10-14 18:10:02.562470255 -0500
 @@ -20,7 +20,7 @@
  !RUN: bbc -emit-hlfir -fopenmp -fopenmp-assume-no-nested-parallelism -fopenmp-is-target-device -o - %s | FileCheck %s --check-prefix=NEST-PAR-DEVICE-FIR
  !RUN: bbc -emit-hlfir -fopenmp -fopenmp-target-debug=1 -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism -fopenmp-assume-threads-oversubscription -fopenmp-assume-no-thread-state -fopenmp-is-target-device -o - %s | FileCheck %s --check-prefix=ALL-DEVICE-FIR
@@ -13006,9 +11007,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/rtl-flags.f90 l
 +!ALL-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags<debug_kind = 1, assume_teams_oversubscription = true, assume_threads_oversubscription = true, assume_no_thread_state = true, assume_no_nested_parallelism = true, openmp_device_version = 52>
  subroutine omp_subroutine()
  end subroutine omp_subroutine
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/target.f90 llvm-project/flang/test/Lower/OpenMP/target.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/target.f90	2024-11-19 12:55:58.496825721 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/target.f90	2024-11-19 12:49:05.211149169 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/target.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/target.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/target.f90	2024-11-23 20:25:46.231205467 -0600
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/target.f90	2024-11-23 20:26:13.831106231 -0600
 @@ -45,16 +45,16 @@
     integer :: b(1024)
     integer :: c(1024)
@@ -13067,21 +11068,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/target.f90 llvm
  end subroutine omp_target_exit_mt
  
  !===============================================================================
-@@ -525,9 +525,9 @@
-    !CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "a", uniq_name = "_QFomp_target_device_addrEa"}
-    !CHECK: %[[VAL_0_DECL:.*]]:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFomp_target_device_addrEa"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
-    !CHECK: %[[MAP_MEMBERS:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, i32) var_ptr_ptr({{.*}} : !fir.llvm_ptr<!fir.ref<i32>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<i32>> {name = ""}
--   !CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBERS]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "a"}
-+   !CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(to) capture(ByRef) members(%[[MAP_MEMBERS]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "a"}
-    !CHECK: %[[DEV_ADDR_MEMBERS:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, i32) var_ptr_ptr({{.*}} : !fir.llvm_ptr<!fir.ref<i32>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<i32>> {name = ""}
--   !CHECK: %[[DEV_ADDR:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(tofrom) capture(ByRef) members(%[[DEV_ADDR_MEMBERS]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "a"}
-+   !CHECK: %[[DEV_ADDR:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(to) capture(ByRef) members(%[[DEV_ADDR_MEMBERS]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "a"}
-    !CHECK: omp.target_data map_entries(%[[MAP_MEMBERS]], %[[MAP]] : {{.*}}) use_device_addr(%[[DEV_ADDR_MEMBERS]] -> %[[ARG_0:.*]], %[[DEV_ADDR]] -> %[[ARG_1:.*]] : !fir.llvm_ptr<!fir.ref<i32>>, !fir.ref<!fir.box<!fir.ptr<i32>>>) {
-    !$omp target data map(tofrom: a) use_device_addr(a)
-    !CHECK: %[[VAL_1_DECL:.*]]:2 = hlfir.declare %[[ARG_1]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFomp_target_device_addrEa"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/target_private.f90 llvm-project/flang/test/Lower/OpenMP/target_private.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/target_private.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/target_private.f90	2024-11-19 12:49:05.211149169 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/target_private.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/target_private.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/target_private.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/target_private.f90	2024-10-18 17:40:53.640775592 -0500
 @@ -0,0 +1,73 @@
 +!Test data-sharing attribute clauses for the `target` directive.
 +
@@ -13156,9 +11145,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/target_private.
 +!CHECK-NEXT: }
 +
 +end subroutine omp_target_target_do_simd
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/target-spmd.f90 llvm-project/flang/test/Lower/OpenMP/target-spmd.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/target-spmd.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/target-spmd.f90	2024-11-19 12:49:05.211149169 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/target-spmd.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/target-spmd.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/target-spmd.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/target-spmd.f90	2024-11-14 15:29:47.026382935 -0600
 @@ -0,0 +1,191 @@
 +! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
 +
@@ -13351,9 +11340,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/target-spmd.f90
 +  end do
 +  !$omp end target teams distribute parallel do simd
 +end subroutine target_teams_distribute_parallel_do_simd_spmd
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90 llvm-project/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90	2024-11-19 12:55:00.606014514 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90	2024-02-19 15:30:03.933299518 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90	2024-10-18 17:40:44.708867169 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90	2024-08-27 20:12:45.309833188 -0500
 @@ -1,6 +1,6 @@
  ! This test checks lowering of OpenMP allocate Directive.
  
@@ -13362,9 +11351,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/Todo/omp-declar
  
  program main
    integer :: x, y
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/Todo/omp-declare-reduction.f90 llvm-project/flang/test/Lower/OpenMP/Todo/omp-declare-reduction.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/Todo/omp-declare-reduction.f90	2024-11-19 12:55:00.606014514 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/Todo/omp-declare-reduction.f90	2024-02-19 15:30:03.933299518 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/Todo/omp-declare-reduction.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/Todo/omp-declare-reduction.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/Todo/omp-declare-reduction.f90	2024-10-18 17:40:44.708867169 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/Todo/omp-declare-reduction.f90	2024-08-27 20:12:45.309833188 -0500
 @@ -1,6 +1,6 @@
  ! This test checks lowering of OpenMP declare reduction Directive.
  
@@ -13373,9 +11362,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/Todo/omp-declar
  
  subroutine declare_red()
    integer :: my_var
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/Todo/omp-declare-simd.f90 llvm-project/flang/test/Lower/OpenMP/Todo/omp-declare-simd.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/Todo/omp-declare-simd.f90	2024-11-19 12:55:00.606014514 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/Todo/omp-declare-simd.f90	2024-02-19 15:30:03.933299518 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/Todo/omp-declare-simd.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/Todo/omp-declare-simd.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/Todo/omp-declare-simd.f90	2024-10-18 17:40:44.708867169 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/Todo/omp-declare-simd.f90	2024-08-27 20:12:45.309833188 -0500
 @@ -1,6 +1,6 @@
  ! This test checks lowering of OpenMP declare simd Directive.
  
@@ -13384,9 +11373,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/Todo/omp-declar
  
  subroutine sub(x, y)
    real, intent(inout) :: x, y
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/Todo/reduction-teams.f90 llvm-project/flang/test/Lower/OpenMP/Todo/reduction-teams.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/Todo/reduction-teams.f90	2023-08-25 16:22:57.769359629 -0400
-+++ llvm-project/flang/test/Lower/OpenMP/Todo/reduction-teams.f90	1969-12-31 19:00:00.000000000 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/Todo/reduction-teams.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/Todo/reduction-teams.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/Todo/reduction-teams.f90	2024-08-27 20:04:05.692040727 -0500
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/Todo/reduction-teams.f90	1969-12-31 18:00:00.000000000 -0600
 @@ -1,12 +0,0 @@
 -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
 -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
@@ -13400,10 +11389,10 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/Todo/reduction-
 -  i = i + 1
 -  !$omp end teams
 -end subroutine reduction_teams
-diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 llvm-project/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
---- llvm-project.upstream/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90	2024-11-19 12:55:58.496825721 -0500
-+++ llvm-project/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90	2024-11-19 12:49:05.212149166 -0500
-@@ -13,9 +13,9 @@
+diff -Naur -x .git llvm-project-trunk/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 llvm-project-trunk-atd/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
+--- llvm-project-trunk/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90	2024-11-23 20:25:34.179248794 -0600
++++ llvm-project-trunk-atd/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90	2024-11-23 20:26:13.831106231 -0600
+@@ -14,9 +14,9 @@
      real, pointer :: pa(:)
      type(c_ptr) :: cptr
  
@@ -13415,8 +11404,8 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/use-device-ptr-
 +end subroutine
  
  !CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr()
- !CHECK: omp.target_data use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) use_device_ptr({{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
-@@ -25,9 +25,9 @@
+ !CHECK: omp.target_data use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) use_device_ptr({{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
+@@ -26,9 +26,9 @@
      real, pointer :: pa(:)
      type(c_ptr) :: cptr
  
@@ -13428,8 +11417,8 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/use-device-ptr-
 +end subroutine
  
       !CHECK: func.func @{{.*}}only_use_device_addr()
-      !CHECK: omp.target_data use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
-@@ -37,9 +37,9 @@
+      !CHECK: omp.target_data use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
+@@ -38,9 +38,9 @@
          real, pointer :: pa(:)
          type(c_ptr) :: cptr
  
@@ -13441,33 +11430,23 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/use-device-ptr-
 +end subroutine
  
       !CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr_and_map()
-      !CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
-@@ -50,18 +50,18 @@
+      !CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
+@@ -51,9 +51,9 @@
          real, pointer :: pa(:)
          type(c_ptr) :: cptr
  
 -       !$omp target data use_device_ptr(pa, cptr) use_device_addr(array) map(tofrom: i, j)
 -       !$omp end target data
 -     end subroutine
--
--     !CHECK: func.func @{{.*}}only_use_map()
--     !CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
--     subroutine only_use_map
--        use iso_c_binding
--        integer, pointer, dimension(:) :: array
--        real, pointer :: pa(:)
--        type(c_ptr) :: cptr
 +    !$omp target data use_device_ptr(pa, cptr) use_device_addr(array) map(tofrom: i, j)
 +    !$omp end target data
 +end subroutine
-+
-+!CHECK: func.func @{{.*}}only_use_map()
-+!CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
-+subroutine only_use_map
-+    use iso_c_binding
-+    integer, pointer, dimension(:) :: array
-+    real, pointer :: pa(:)
-+    type(c_ptr) :: cptr
+ 
+      !CHECK: func.func @{{.*}}only_use_map()
+      !CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
+@@ -63,6 +63,6 @@
+         real, pointer :: pa(:)
+         type(c_ptr) :: cptr
  
 -       !$omp target data map(pa, cptr, array)
 -       !$omp end target data
@@ -13475,27 +11454,36 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Lower/OpenMP/use-device-ptr-
 +    !$omp target data map(pa, cptr, array)
 +    !$omp end target data
 +end subroutine
-diff -Naur -x .git llvm-project.upstream/flang/test/Semantics/OpenMP/ordered01.f90 llvm-project/flang/test/Semantics/OpenMP/ordered01.f90
---- llvm-project.upstream/flang/test/Semantics/OpenMP/ordered01.f90	2024-11-19 12:55:00.627014446 -0500
-+++ llvm-project/flang/test/Semantics/OpenMP/ordered01.f90	2024-11-19 12:49:05.225149125 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Semantics/OpenMP/combined-constructs.f90 llvm-project-trunk-atd/flang/test/Semantics/OpenMP/combined-constructs.f90
+--- llvm-project-trunk/flang/test/Semantics/OpenMP/combined-constructs.f90	2024-11-23 20:30:46.682124151 -0600
++++ llvm-project-trunk-atd/flang/test/Semantics/OpenMP/combined-constructs.f90	2024-11-23 20:26:13.831106231 -0600
+@@ -1,4 +1,4 @@
+-! RUN: %python %S/../test_errors.py %s %flang -fopenmp
++! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=11
+ 
+ program main
+   implicit none
+diff -Naur -x .git llvm-project-trunk/flang/test/Semantics/OpenMP/ordered01.f90 llvm-project-trunk-atd/flang/test/Semantics/OpenMP/ordered01.f90
+--- llvm-project-trunk/flang/test/Semantics/OpenMP/ordered01.f90	2024-11-14 15:29:00.890564405 -0600
++++ llvm-project-trunk-atd/flang/test/Semantics/OpenMP/ordered01.f90	2024-11-14 15:29:47.030382919 -0600
 @@ -1,4 +1,4 @@
 -! RUN: %python %S/../test_errors.py %s %flang -fopenmp
 +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
  ! OpenMP Version 5.1
  ! Check OpenMP construct validity for the following directives:
  ! 2.19.9 Ordered Construct
-diff -Naur -x .git llvm-project.upstream/flang/test/Semantics/OpenMP/ordered03.f90 llvm-project/flang/test/Semantics/OpenMP/ordered03.f90
---- llvm-project.upstream/flang/test/Semantics/OpenMP/ordered03.f90	2024-11-19 12:55:00.627014446 -0500
-+++ llvm-project/flang/test/Semantics/OpenMP/ordered03.f90	2024-11-19 12:49:05.225149125 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Semantics/OpenMP/ordered03.f90 llvm-project-trunk-atd/flang/test/Semantics/OpenMP/ordered03.f90
+--- llvm-project-trunk/flang/test/Semantics/OpenMP/ordered03.f90	2024-11-14 15:29:00.890564405 -0600
++++ llvm-project-trunk-atd/flang/test/Semantics/OpenMP/ordered03.f90	2024-11-14 15:29:47.030382919 -0600
 @@ -1,4 +1,4 @@
 -! RUN: %python %S/../test_errors.py %s %flang -fopenmp
 +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
  ! OpenMP Version 5.1
  ! Check OpenMP construct validity for the following directives:
  ! 2.19.9 Ordered Construct
-diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/basic_device.f90 llvm-project/flang/test/Transforms/DoConcurrent/basic_device.f90
---- llvm-project.upstream/flang/test/Transforms/DoConcurrent/basic_device.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Transforms/DoConcurrent/basic_device.f90	2024-11-19 12:49:05.227149119 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Transforms/DoConcurrent/basic_device.f90 llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/basic_device.f90
+--- llvm-project-trunk/flang/test/Transforms/DoConcurrent/basic_device.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/basic_device.f90	2024-11-14 15:29:47.030382919 -0600
 @@ -0,0 +1,86 @@
 +! Tests mapping of a basic `do concurrent` loop to
 +! `!$omp target teams distribute parallel do`.
@@ -13583,9 +11571,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/basi
 +
 +    ! CHECK-NOT: fir.do_loop
 +end program do_concurrent_basic
-diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/basic_host.f90 llvm-project/flang/test/Transforms/DoConcurrent/basic_host.f90
---- llvm-project.upstream/flang/test/Transforms/DoConcurrent/basic_host.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Transforms/DoConcurrent/basic_host.f90	2024-11-19 12:49:05.227149119 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Transforms/DoConcurrent/basic_host.f90 llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/basic_host.f90
+--- llvm-project-trunk/flang/test/Transforms/DoConcurrent/basic_host.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/basic_host.f90	2024-10-18 17:40:53.648775511 -0500
 @@ -0,0 +1,49 @@
 +! Tests mapping of a basic `do concurrent` loop to `!$omp parallel do`.
 +
@@ -13636,9 +11624,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/basi
 +
 +    ! CHECK-NOT: fir.do_loop
 +end program do_concurrent_basic
-diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/basic_host.mlir llvm-project/flang/test/Transforms/DoConcurrent/basic_host.mlir
---- llvm-project.upstream/flang/test/Transforms/DoConcurrent/basic_host.mlir	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Transforms/DoConcurrent/basic_host.mlir	2024-11-19 12:49:05.227149119 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Transforms/DoConcurrent/basic_host.mlir llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/basic_host.mlir
+--- llvm-project-trunk/flang/test/Transforms/DoConcurrent/basic_host.mlir	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/basic_host.mlir	2024-10-18 17:40:53.648775511 -0500
 @@ -0,0 +1,62 @@
 +// Tests mapping of a basic `do concurrent` loop to `!$omp parallel do`.
 +
@@ -13702,9 +11690,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/basi
 +
 +    return
 +  }
-diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 llvm-project/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90
---- llvm-project.upstream/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90	2024-11-19 12:49:05.227149119 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90
+--- llvm-project-trunk/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90	2024-10-18 17:40:53.648775511 -0500
 @@ -0,0 +1,76 @@
 +! Tests that locally destroyed values in a `do concurrent` loop are properly
 +! handled. Locally destroyed values are those values for which the Fortran runtime
@@ -13782,102 +11770,147 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/loca
 +! DEVICE: }
 +! DEVICE: omp.terminator
 +! DEVICE: }
-diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 llvm-project/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90
---- llvm-project.upstream/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90	2024-11-19 12:49:05.228149116 -0500
-@@ -0,0 +1,163 @@
-+! Tests mapping of a `do concurrent` loop with multiple iteration ranges.
+diff -Naur -x .git llvm-project-trunk/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/loop_nest_test.f90
+--- llvm-project-trunk/flang/test/Transforms/DoConcurrent/loop_nest_test.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/loop_nest_test.f90	2024-11-23 20:26:13.835106217 -0600
+@@ -0,0 +1,87 @@
++! Tests loop-nest detection algorithm for do-concurrent mapping.
 +
-+! RUN: split-file %s %t
++! REQUIRES: asserts
 +
-+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %t/multi_range.f90 -o - \
-+! RUN:   | FileCheck %s --check-prefixes=HOST,COMMON
++! RUN: %flang_fc1 -emit-hlfir  -fopenmp -fdo-concurrent-parallel=host \
++! RUN:   -mmlir -debug %s -o - 2> %t.log || true
 +
-+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %t/multi_range.f90 -o - \
-+! RUN:   | FileCheck %s --check-prefixes=DEVICE,COMMON
++! RUN: FileCheck %s < %t.log
 +
-+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %t/perfectly_nested.f90 -o - \
-+! RUN:   | FileCheck %s --check-prefixes=HOST,COMMON
++program main
++  implicit none
 +
-+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %t/perfectly_nested.f90 -o - \
-+! RUN:   | FileCheck %s --check-prefixes=DEVICE,COMMON
++contains
 +
-+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %t/partially_nested.f90 -o - \
-+! RUN:   | FileCheck %s --check-prefixes=HOST,COMMON
++subroutine foo(n)
++  implicit none
++  integer :: n, m
++  integer :: i, j, k
++  integer :: x
++  integer, dimension(n) :: a
++  integer, dimension(n, n, n) :: b
 +
-+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %t/partially_nested.f90 -o - \
-+! RUN:   | FileCheck %s --check-prefixes=DEVICE,COMMON
++  ! CHECK: Loop pair starting at location
++  ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is perfectly nested
++  do concurrent(i=1:n, j=1:bar(n*m, n/m))
++    a(i) = n
++  end do
 +
-+! This is temporarily disabled since the IR for `do concurrent` loops is different after
-+! https://github.com/llvm/llvm-project/pull/114020. This will be enabled again soon.
-+! XFAIL: true
++  ! CHECK: Loop pair starting at location
++  ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is perfectly nested
++  do concurrent(i=bar(n, x):n, j=1:bar(n*m, n/m))
++    a(i) = n
++  end do
 +
-+!--- multi_range.f90
-+program main
-+   integer, parameter :: n = 10
-+   integer, parameter :: m = 20
-+   integer, parameter :: l = 30
-+   integer :: a(n, m, l)
++  ! CHECK: Loop pair starting at location
++  ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested
++  do concurrent(i=bar(n, x):n)
++    do concurrent(j=1:bar(n*m, n/m))
++      a(i) = n
++    end do
++  end do
 +
-+   do concurrent(i=1:n, j=1:m, k=1:l)
-+       a(i,j,k) = i * j + k
-+   end do
-+end
++  ! CHECK: Loop pair starting at location
++  ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested
++  do concurrent(i=1:n)
++    x = 10
++    do concurrent(j=1:m)
++      b(i,j,k) = i * j + k
++    end do
++  end do
 +
-+!--- perfectly_nested.f90
-+program main
-+   integer, parameter :: n = 10
-+   integer, parameter :: m = 20
-+   integer, parameter :: l = 30
-+   integer :: a(n, m, l)
++  ! CHECK: Loop pair starting at location
++  ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested
++  do concurrent(i=1:n)
++    do concurrent(j=1:m)
++      b(i,j,k) = i * j + k
++    end do
++    x = 10
++  end do
 +
-+   do concurrent(i=1:n)
-+     do concurrent(j=1:m)
-+       do concurrent(k=1:l)
-+         a(i,j,k) = i * j + k
-+       end do
-+     end do
-+   end do
-+end
++  ! CHECK: Loop pair starting at location
++  ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested
++  do concurrent(i=1:n)
++    do concurrent(j=1:m)
++      b(i,j,k) = i * j + k
++      x = 10
++    end do
++  end do
++
++  ! CHECK: Loop pair starting at location
++  ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is perfectly nested
++  do concurrent(i=bar(n, x):n, j=1:bar(n*m, n/m), k=1:bar(n*m, bar(n*m, n/m)))
++    a(i) = n
++  end do
++
++
++end subroutine
++
++pure function bar(n, m)
++    implicit none
++    integer, intent(in) :: n, m
++    integer :: bar
 +
-+!--- partially_nested.f90
++    bar = n + m
++end function
++
++end program main
+diff -Naur -x .git llvm-project-trunk/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90
+--- llvm-project-trunk/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90	2024-11-23 20:26:13.835106217 -0600
+@@ -0,0 +1,118 @@
++! Tests mapping of a `do concurrent` loop with multiple iteration ranges.
++
++! RUN: split-file %s %t
++
++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %t/multi_range.f90 -o - \
++! RUN:   | FileCheck %s --check-prefixes=HOST,COMMON
++
++! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %t/multi_range.f90 -o - \
++! RUN:   | FileCheck %s --check-prefixes=DEVICE,COMMON
++
++!--- multi_range.f90
 +program main
-+   integer, parameter :: n = 10
-+   integer, parameter :: m = 20
-+   integer, parameter :: l = 30
++   integer, parameter :: n = 20
++   integer, parameter :: m = 40
++   integer, parameter :: l = 60
 +   integer :: a(n, m, l)
 +
-+   do concurrent(i=1:n, j=1:m)
-+       do concurrent(k=1:l)
-+         a(i,j,k) = i * j + k
-+       end do
++   do concurrent(i=3:n, j=5:m, k=7:l)
++       a(i,j,k) = i * j + k
 +   end do
 +end
 +
 +! COMMON: func.func @_QQmain
 +
-+! DEVICE: %[[DUPLICATED_C1_1:.*]] = arith.constant 1 : i32
-+! DEVICE: %[[DUPLICATED_LB_I:.*]] = fir.convert %[[DUPLICATED_C1_1]] : (i32) -> index
-+! DEVICE: %[[DUPLICATED_C10:.*]] = arith.constant 10 : i32
-+! DEVICE: %[[DUPLICATED_UB_I:.*]] = fir.convert %[[DUPLICATED_C10]] : (i32) -> index
++! DEVICE: %[[DUPLICATED_C3:.*]] = arith.constant 3 : i32
++! DEVICE: %[[DUPLICATED_LB_I:.*]] = fir.convert %[[DUPLICATED_C3]] : (i32) -> index
++! DEVICE: %[[DUPLICATED_C20:.*]] = arith.constant 20 : i32
++! DEVICE: %[[DUPLICATED_UB_I:.*]] = fir.convert %[[DUPLICATED_C20]] : (i32) -> index
 +! DEVICE: %[[DUPLICATED_STEP_I:.*]] = arith.constant 1 : index
 +
-+! DEVICE: %[[C1_1:.*]] = arith.constant 1 : i32
-+! DEVICE: %[[HOST_LB_I:.*]] = fir.convert %[[C1_1]] : (i32) -> index
-+! DEVICE: %[[C10:.*]] = arith.constant 10 : i32
-+! DEVICE: %[[HOST_UB_I:.*]] = fir.convert %[[C10]] : (i32) -> index
++! DEVICE: %[[C3:.*]] = arith.constant 3 : i32
++! DEVICE: %[[HOST_LB_I:.*]] = fir.convert %[[C3]] : (i32) -> index
++! DEVICE: %[[C20:.*]] = arith.constant 20 : i32
++! DEVICE: %[[HOST_UB_I:.*]] = fir.convert %[[C20]] : (i32) -> index
 +! DEVICE: %[[HOST_STEP_I:.*]] = arith.constant 1 : index
 +
-+! DEVICE: %[[C1_2:.*]] = arith.constant 1 : i32
-+! DEVICE: %[[HOST_LB_J:.*]] = fir.convert %[[C1_2]] : (i32) -> index
-+! DEVICE: %[[C20:.*]] = arith.constant 20 : i32
-+! DEVICE: %[[HOST_UB_J:.*]] = fir.convert %[[C20]] : (i32) -> index
++! DEVICE: %[[C5:.*]] = arith.constant 5 : i32
++! DEVICE: %[[HOST_LB_J:.*]] = fir.convert %[[C5]] : (i32) -> index
++! DEVICE: %[[C40:.*]] = arith.constant 40 : i32
++! DEVICE: %[[HOST_UB_J:.*]] = fir.convert %[[C40]] : (i32) -> index
 +! DEVICE: %[[HOST_STEP_J:.*]] = arith.constant 1 : index
 +
-+! DEVICE: %[[C1_3:.*]] = arith.constant 1 : i32
-+! DEVICE: %[[HOST_LB_K:.*]] = fir.convert %[[C1_3]] : (i32) -> index
-+! DEVICE: %[[C30:.*]] = arith.constant 30 : i32
-+! DEVICE: %[[HOST_UB_K:.*]] = fir.convert %[[C30]] : (i32) -> index
++! DEVICE: %[[C7:.*]] = arith.constant 7 : i32
++! DEVICE: %[[HOST_LB_K:.*]] = fir.convert %[[C7]] : (i32) -> index
++! DEVICE: %[[C60:.*]] = arith.constant 60 : i32
++! DEVICE: %[[HOST_UB_K:.*]] = fir.convert %[[C60]] : (i32) -> index
 +! DEVICE: %[[HOST_STEP_K:.*]] = arith.constant 1 : index
 +
 +! DEVICE: omp.target host_eval(
@@ -13891,6 +11924,7 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/mult
 +! DEVICE-SAME: %[[HOST_UB_K]] -> %[[UB_K:[[:alnum:]]+]],
 +! DEVICE-SAME: %[[HOST_STEP_K]] -> %[[STEP_K:[[:alnum:]]+]] :
 +! DEVICE-SAME: index, index, index, index, index, index, index, index, index)
++
 +! DEVICE: omp.teams
 +
 +! HOST-NOT: omp.target
@@ -13907,22 +11941,22 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/mult
 +! COMMON-NEXT: %[[ITER_VAR_K:.*]] = fir.alloca i32 {bindc_name = "k"}
 +! COMMON-NEXT: %[[BINDING_K:.*]]:2 = hlfir.declare %[[ITER_VAR_K]] {uniq_name = "_QFEk"}
 +
-+! HOST: %[[C1_1:.*]] = arith.constant 1 : i32
-+! HOST: %[[LB_I:.*]] = fir.convert %[[C1_1]] : (i32) -> index
-+! HOST: %[[C10:.*]] = arith.constant 10 : i32
-+! HOST: %[[UB_I:.*]] = fir.convert %[[C10]] : (i32) -> index
++! HOST: %[[C3:.*]] = arith.constant 3 : i32
++! HOST: %[[LB_I:.*]] = fir.convert %[[C3]] : (i32) -> index
++! HOST: %[[C20:.*]] = arith.constant 20 : i32
++! HOST: %[[UB_I:.*]] = fir.convert %[[C20]] : (i32) -> index
 +! HOST: %[[STEP_I:.*]] = arith.constant 1 : index
 +
-+! HOST: %[[C1_2:.*]] = arith.constant 1 : i32
-+! HOST: %[[LB_J:.*]] = fir.convert %[[C1_2]] : (i32) -> index
-+! HOST: %[[C20:.*]] = arith.constant 20 : i32
-+! HOST: %[[UB_J:.*]] = fir.convert %[[C20]] : (i32) -> index
++! HOST: %[[C5:.*]] = arith.constant 5 : i32
++! HOST: %[[LB_J:.*]] = fir.convert %[[C5]] : (i32) -> index
++! HOST: %[[C40:.*]] = arith.constant 40 : i32
++! HOST: %[[UB_J:.*]] = fir.convert %[[C40]] : (i32) -> index
 +! HOST: %[[STEP_J:.*]] = arith.constant 1 : index
 +
-+! HOST: %[[C1_3:.*]] = arith.constant 1 : i32
-+! HOST: %[[LB_K:.*]] = fir.convert %[[C1_3]] : (i32) -> index
-+! HOST: %[[C30:.*]] = arith.constant 30 : i32
-+! HOST: %[[UB_K:.*]] = fir.convert %[[C30]] : (i32) -> index
++! HOST: %[[C7:.*]] = arith.constant 7 : i32
++! HOST: %[[LB_K:.*]] = fir.convert %[[C7]] : (i32) -> index
++! HOST: %[[C60:.*]] = arith.constant 60 : i32
++! HOST: %[[UB_K:.*]] = fir.convert %[[C60]] : (i32) -> index
 +! HOST: %[[STEP_K:.*]] = arith.constant 1 : index
 +
 +! DEVICE: omp.distribute
@@ -13949,9 +11983,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/mult
 +
 +! HOST-NEXT: omp.terminator
 +! HOST-NEXT: }
-diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 llvm-project/flang/test/Transforms/DoConcurrent/non_const_bounds.f90
---- llvm-project.upstream/flang/test/Transforms/DoConcurrent/non_const_bounds.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Transforms/DoConcurrent/non_const_bounds.f90	2024-11-19 12:49:05.228149116 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/non_const_bounds.f90
+--- llvm-project-trunk/flang/test/Transforms/DoConcurrent/non_const_bounds.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/non_const_bounds.f90	2024-10-18 17:40:53.648775511 -0500
 @@ -0,0 +1,44 @@
 +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %s -o - \
 +! RUN:   | FileCheck %s
@@ -13997,9 +12031,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/non_
 +! CHECK:   omp.terminator
 +! CHECK: }
 +
-diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 llvm-project/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90
---- llvm-project.upstream/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90	2024-11-19 12:49:05.228149116 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90
+--- llvm-project-trunk/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90	2024-10-18 17:40:53.648775511 -0500
 @@ -0,0 +1,65 @@
 +! Tests that if `do concurrent` is not perfectly nested in its parent loop, that
 +! we skip converting the not-perfectly nested `do concurrent` loop.
@@ -14066,9 +12100,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/not_
 +! COMMON: }
 +! COMMON: omp.terminator
 +! COMMON: }
-diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 llvm-project/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90
---- llvm-project.upstream/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90	2024-09-09 10:42:24.032600742 -0400
+diff -Naur -x .git llvm-project-trunk/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90
+--- llvm-project-trunk/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90	2024-08-28 08:37:25.116601277 -0500
 @@ -0,0 +1,42 @@
 +! Tests `do concurrent` mapping when mapped value(s) depend on values defined
 +! outside the target region; e.g. the size of the array is dynamic. This needs
@@ -14112,9 +12146,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/runt
 +
 +
 +
-diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 llvm-project/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90
---- llvm-project.upstream/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90	2024-11-19 12:49:05.228149116 -0500
+diff -Naur -x .git llvm-project-trunk/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90
+--- llvm-project-trunk/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90	2024-10-18 17:40:53.648775511 -0500
 @@ -0,0 +1,63 @@
 +! Tests that if `do concurrent` is indirectly nested in its parent loop, that we
 +! skip converting the indirectly nested `do concurrent` loop.
@@ -14179,212 +12213,9 @@ diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/DoConcurrent/skip
 +! COMMON: }
 +! COMMON: omp.terminator
 +! COMMON: }
-diff -Naur -x .git llvm-project.upstream/flang/test/Transforms/omp-map-info-finalization.fir llvm-project/flang/test/Transforms/omp-map-info-finalization.fir
---- llvm-project.upstream/flang/test/Transforms/omp-map-info-finalization.fir	2024-11-19 12:55:58.497825718 -0500
-+++ llvm-project/flang/test/Transforms/omp-map-info-finalization.fir	2024-11-19 12:49:05.229149113 -0500
-@@ -11,7 +11,7 @@
-     %5 = fir.allocmem i32 {fir.must_be_heap = true}
-     %6 = fir.embox %5 : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
-     fir.store %6 to %4#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
--    %c0 = arith.constant 1 : index  
-+    %c0 = arith.constant 1 : index
-     %c1 = arith.constant 0 : index
-     %c2 = arith.constant 10 : index
-     %dims:3 = fir.box_dims %2#1, %c1 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-@@ -34,11 +34,11 @@
- // CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%{{.*}} : index) upper_bound(%{{.*}} : index) extent(%{{.*}} : index) stride(%{{.*}} : index) start_idx(%{{.*}} : index) {stride_in_bytes = true}
- // CHECK: %[[BASE_ADDR_OFF:.*]] = fir.box_offset %[[DECLARE2]]#1 base_addr : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> !fir.llvm_ptr<!fir.ref<i32>>
- // CHECK: %[[DESC_MEMBER_MAP:.*]] = omp.map.info var_ptr(%[[DECLARE2]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>, i32) var_ptr_ptr(%[[BASE_ADDR_OFF]] : !fir.llvm_ptr<!fir.ref<i32>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<i32>> {name = ""}
--// CHECK: %[[DESC_PARENT_MAP:.*]] = omp.map.info var_ptr(%[[DECLARE2]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.box<!fir.heap<i32>>) map_clauses(tofrom) capture(ByRef) members(%[[DESC_MEMBER_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.heap<i32>>>
-+// CHECK: %[[DESC_PARENT_MAP:.*]] = omp.map.info var_ptr(%[[DECLARE2]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.box<!fir.heap<i32>>) map_clauses(to) capture(ByRef) members(%[[DESC_MEMBER_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.heap<i32>>>
- // CHECK: fir.store %[[DECLARE1]]#1 to %[[ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
- // CHECK: %[[BASE_ADDR_OFF_2:.*]] = fir.box_offset %[[ALLOCA]] base_addr : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
- // CHECK: %[[DESC_MEMBER_MAP_2:.*]] = omp.map.info var_ptr(%[[ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.array<?xi32>) var_ptr_ptr(%[[BASE_ADDR_OFF_2]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(from) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
--// CHECK: %[[DESC_PARENT_MAP_2:.*]] = omp.map.info var_ptr(%[[ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.box<!fir.array<?xi32>>) map_clauses(from) capture(ByRef) members(%[[DESC_MEMBER_MAP_2]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>>
-+// CHECK: %[[DESC_PARENT_MAP_2:.*]] = omp.map.info var_ptr(%[[ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.box<!fir.array<?xi32>>) map_clauses(to) capture(ByRef) members(%[[DESC_MEMBER_MAP_2]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>>
- // CHECK: omp.target map_entries(%[[DESC_MEMBER_MAP]] -> %[[ARG1:.*]], %[[DESC_PARENT_MAP]] -> %[[ARG2:.*]], %[[DESC_MEMBER_MAP_2]] -> %[[ARG3:.*]], %[[DESC_PARENT_MAP_2]] -> %[[ARG4:.*]] : {{.*}}) {
- 
- // -----
-@@ -91,3 +91,176 @@
- // CHECK:   %[[MAP_MEMBER_2:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<f32>, f32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<f32> {name = "sa%n%r"}
- // CHECK:   %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}, {{.*}}) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_1]], %[[MAP_MEMBER_2]] : [1,0], [1,1] : !fir.ref<i32>, !fir.ref<f32>) -> {{.*}} {name = "sa", partial_map = true}
- // CHECK:   omp.target map_entries(%[[MAP_MEMBER_1]] -> %[[ARG1:.*]], %[[MAP_MEMBER_2]] -> %[[ARG2:.*]], %[[MAP_PARENT]] -> %[[ARG3:.*]] : !fir.ref<i32>, !fir.ref<f32>, {{.*}}) {
-+
-+// -----
-+
-+module attributes {omp.is_target_device = false} {
-+   func.func @_QPtest_derived_type_allocatable_map_operand_and_block_addition(%arg0: !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) {
-+    %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtest_derived_type_allocatable_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) -> (!fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>)
-+    %1 = hlfir.designate %0#0{"array_j"}   {fortran_attrs = #fir.var_attrs<allocatable>} : (!fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+    %2 = fir.load %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+    %c0_0 = arith.constant 0 : index
-+    %3:3 = fir.box_dims %2, %c0_0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-+    %c1_9 = arith.constant 1 : index
-+    %c0_1 = arith.constant 0 : index
-+    %4:3 = fir.box_dims %2, %c0_1 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-+    %c0_2 = arith.constant 0 : index
-+    %5 = arith.subi %4#1, %c1_9 : index
-+    %6 = omp.map.bounds lower_bound(%c0_2 : index) upper_bound(%5 : index) extent(%4#1 : index) stride(%4#2 : index) start_idx(%3#0 : index) {stride_in_bytes = true}
-+    %c4 = arith.constant 4 : index
-+    %7 = fir.coordinate_of %0#0, %c4 : (!fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+    %8 = omp.map.info var_ptr(%7 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%6) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%array_j"}
-+    %9 = omp.map.info var_ptr(%0#0 : !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, !fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>) map_clauses(tofrom) capture(ByRef) members(%8 : [4] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>> {name = "one_l", partial_map = true}
-+    omp.target map_entries(%9 -> %arg1 : !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) {
-+      omp.terminator
-+    }
-+    return
-+  }
-+}
-+
-+// CHECK: func.func @_QPtest_derived_type_allocatable_map_operand_and_block_addition(%[[ARG0:.*]]: !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) {
-+// CHECK:   %[[ALLOCA:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFtest_derived_type_allocatable_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) -> (!fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>)
-+// CHECK:   %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%{{.*}} : index) upper_bound(%{{.*}} : index) extent(%{{.*}} : index) stride(%{{.*}} : index) start_idx(%{{.*}} : index) {stride_in_bytes = true}
-+// CHECK:   %[[MEMBER_COORD:.*]] = fir.coordinate_of %[[ALLOCA]]#0, %{{.*}} : (!fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+// CHECK:   %[[MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[MEMBER_COORD:.*]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
-+// CHECK:   %[[MAP_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[MEMBER_BASE_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
-+// CHECK:   %[[MAP_MEMBER_DESCRIPTOR:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%array_j"}
-+// CHECK:   %[[MAP_MEMBER_PARENT:.*]] = omp.map.info var_ptr(%[[ALLOCA]]#0 : !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, !fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>) map_clauses(tofrom) capture(ByRef) members(%10, %9 : [4], [4,0] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>> {name = "one_l", partial_map = true}
-+// CHECK:    omp.target map_entries(%[[MAP_MEMBER_DESCRIPTOR]] -> %[[ARG1:.*]], %[[MAP_MEMBER_BASE_ADDR]] -> %[[ARG2:.*]], %[[MAP_MEMBER_PARENT]] -> %[[ARG3:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) {
-+
-+// -----
-+
-+func.func @_QPtest_allocatable_derived_type_map_operand_and_block_addition(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>) {
-+    %0:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_allocatable_derived_type_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>)
-+    %1 = fir.load %0#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>
-+    %2 = fir.box_addr %1 : (!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>) -> !fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+    %3 = hlfir.designate %2{"array_j"}   {fortran_attrs = #fir.var_attrs<allocatable>} : (!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+    %4 = fir.load %3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+    %c0_0 = arith.constant 0 : index
-+    %5:3 = fir.box_dims %4, %c0_0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-+    %c1_0 = arith.constant 1 : index
-+    %c0_1 = arith.constant 0 : index
-+    %6:3 = fir.box_dims %4, %c0_1 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-+    %c0_2 = arith.constant 0 : index
-+    %7 = arith.subi %6#1, %c1_0 : index
-+    %8 = omp.map.bounds lower_bound(%c0_2 : index) upper_bound(%7 : index) extent(%6#1 : index) stride(%6#2 : index) start_idx(%5#0 : index) {stride_in_bytes = true}
-+    %9 = fir.load %0#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>
-+    %c4 = arith.constant 4 : index
-+    %10 = fir.coordinate_of %9, %c4 : (!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+    %11 = omp.map.info var_ptr(%10 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%8) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%array_j"}
-+    %12 = fir.load %0#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>
-+    %c5 = arith.constant 5 : index
-+    %13 = fir.coordinate_of %12, %c5 : (!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>, index) -> !fir.ref<i32>
-+    %14 = omp.map.info var_ptr(%13 : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "one_l%k"}
-+    %15 = omp.map.info var_ptr(%0#1 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>, !fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>) map_clauses(tofrom) capture(ByRef) members(%11, %14 : [4], [5] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<i32>) -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>> {name = "one_l", partial_map = true}
-+    omp.target map_entries(%15 -> %arg1 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>) {
-+      omp.terminator
-+    }
-+    return
-+  }
-+
-+// CHECK: func.func @_QPtest_allocatable_derived_type_map_operand_and_block_addition(%[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>) {
-+// CHECK:     %[[ALLOCA:.*]]:2 = hlfir.declare %[[ARG0]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_allocatable_derived_type_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>)
-+// CHECK:     %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%{{.*}} : index) upper_bound(%{{.*}} : index) extent(%{{.*}}#1 : index) stride(%{{.*}}#2 : index) start_idx(%{{.*}}#0 : index) {stride_in_bytes = true}
-+// CHECK:     %[[LOAD_ALLOCA:.*]] = fir.load %[[ALLOCA]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>
-+// CHECK:     %[[ALLOCATABLE_MEMBER_COORD:.*]] = fir.coordinate_of %[[LOAD_ALLOCA]], %{{.*}} : (!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+// CHECK:     %[[ALLOCATABLE_MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[ALLOCATABLE_MEMBER_COORD]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
-+// CHECK:     %[[MAP_ALLOCA_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[ALLOCATABLE_MEMBER_COORD]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[ALLOCATABLE_MEMBER_BASE_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
-+// CHECK:     %[[MAP_ALLOCA_MEMBER_DESCRIPTOR:.*]] = omp.map.info var_ptr(%[[ALLOCATABLE_MEMBER_COORD]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%array_j"}
-+// CHECK:     %[[LOAD_ALLOCA2:.*]] = fir.load %[[ALLOCA]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>
-+// CHECK:     %[[REGULAR_MEMBER_COORD:.*]] = fir.coordinate_of %[[LOAD_ALLOCA2]], %{{.*}} : (!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>, index) -> !fir.ref<i32>
-+// CHECK:     %[[MAP_REGULAR_MEMBER:.*]] = omp.map.info var_ptr(%[[REGULAR_MEMBER_COORD]] : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "one_l%k"}
-+// CHECK:     %[[ALLOCATABLE_PARENT_BASE_ADDR:.*]] = fir.box_offset %[[ALLOCA]]#1 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>
-+// CHECK:     %[[MAP_ALLOCA_PARENT_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[ALLOCA]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>, !fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>) var_ptr_ptr(%[[ALLOCATABLE_PARENT_BASE_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>> {name = ""}
-+// CHECK:     %[[MAP_PARENT_DESCRIPTOR:.*]] = omp.map.info var_ptr(%[[ALLOCA]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>, !fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>) map_clauses(to) capture(ByRef) members(%18, %13, %12, %16 : [0], [0,4], [0,4,0], [0,5] : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<i32>) -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>> {name = "one_l"}
-+// CHECK:    omp.target map_entries(%[[MAP_ALLOCA_PARENT_BASE_ADDR]] -> %[[ARG1:.*]], %[[MAP_ALLOCA_MEMBER_DESCRIPTOR]] -> %[[ARG2:.*]], %[[MAP_ALLOCA_MEMBER_BASE_ADDR]] -> %[[ARG3:.*]], %[[MAP_REGULAR_MEMBER]] -> %[[ARG4:.*]], %[[MAP_PARENT_DESCRIPTOR]] -> %[[ARG5:.*]] : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<i32>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>) {
-+
-+// -----
-+
-+  func.func @_QPtest_alloca_nested_derived_type_map_operand_and_block_addition(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>) {
-+    %0:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_alloca_nested_derived_type_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>)
-+    %1 = fir.load %0#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>
-+    %2 = fir.box_addr %1 : (!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>) -> !fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>
-+    %3 = hlfir.designate %2{"nest"}   : (!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>) -> !fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+    %4 = hlfir.designate %3{"array_k"}   {fortran_attrs = #fir.var_attrs<allocatable>} : (!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+    %5 = fir.load %4 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+    %c0_0 = arith.constant 0 : index
-+    %6:3 = fir.box_dims %5, %c0_0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-+    %c1_0 = arith.constant 1 : index
-+    %c0_1 = arith.constant 0 : index
-+    %7:3 = fir.box_dims %5, %c0_1 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-+    %8 = arith.subi %7#1, %c1_0 : index
-+    %9 = omp.map.bounds lower_bound(%c0_1 : index) upper_bound(%8 : index) extent(%7#1 : index) stride(%7#2 : index) start_idx(%6#0 : index) {stride_in_bytes = true}
-+    %10 = fir.load %0#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>
-+    %c6 = arith.constant 6 : index
-+    %11 = fir.coordinate_of %10, %c6 : (!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>, index) -> !fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+    %c2_0 = arith.constant 2 : index
-+    %12 = fir.coordinate_of %11, %c2_0 : (!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+    %13 = omp.map.info var_ptr(%12 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%9) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%nest%array_k"}
-+    %14 = fir.load %0#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>
-+    %15 = fir.coordinate_of %14, %c6 : (!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>, index) -> !fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+    %c3 = arith.constant 3 : index
-+    %16 = fir.coordinate_of %15, %c3 : (!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<i32>
-+    %17 = omp.map.info var_ptr(%16 : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "one_l%nest%k"}
-+    %18 = omp.map.info var_ptr(%0#1 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>, !fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>) map_clauses(tofrom) capture(ByRef) members(%13, %17 : [6,2], [6,3] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<i32>) -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>> {name = "one_l", partial_map = true}
-+    omp.target map_entries(%18 -> %arg1 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>) {
-+      omp.terminator
-+    }
-+    return
-+  }
-+
-+// CHECK: func.func @_QPtest_alloca_nested_derived_type_map_operand_and_block_addition(%[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>) {
-+// CHECK:   %[[ALLOCA:.*]]:2 = hlfir.declare %[[ARG0]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_alloca_nested_derived_type_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>)
-+// CHECK:   %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%{{.*}} : index) upper_bound(%{{.*}} : index) extent(%{{.*}} : index) stride(%{{.*}} : index) start_idx(%{{.*}} : index) {stride_in_bytes = true}
-+// CHECK:   %[[ALLOCA_LOAD:.*]] = fir.load %[[ALLOCA]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>
-+// CHECK:   %[[INTERMEDIATE_DTYPE_NESTED_MEMBER:.*]] = fir.coordinate_of %[[ALLOCA_LOAD]], %{{.*}} : (!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>, index) -> !fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+// CHECK:   %[[NESTED_ALLOCA_MEMBER:.*]] = fir.coordinate_of %[[INTERMEDIATE_DTYPE_NESTED_MEMBER]], %{{.*}} : (!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+// CHECK:   %[[NESTED_ALLOCA_MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[NESTED_ALLOCA_MEMBER]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
-+// CHECK:   %[[MAP_NESTED_ALLOCA_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[NESTED_ALLOCA_MEMBER]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[NESTED_ALLOCA_MEMBER_BASE_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
-+// CHECK:   %[[MAP_NESTED_ALLOCA_MEMBER:.*]] = omp.map.info var_ptr(%[[NESTED_ALLOCA_MEMBER]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%nest%array_k"}
-+// CHECK:   %[[ALLOCA_LOAD2:.*]] = fir.load %[[ALLOCA]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>
-+// CHECK:   %[[INTERMEDIATE_DTYPE_NESTED_MEMBER2:.*]] = fir.coordinate_of %[[ALLOCA_LOAD2]], %{{.*}} : (!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>, index) -> !fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+// CHECK:   %[[NESTED_REGULAR_MEMBER:.*]] = fir.coordinate_of %[[INTERMEDIATE_DTYPE_NESTED_MEMBER2]], %{{.*}} : (!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<i32>
-+// CHECK:   %[[MAP_NESTED_REGULAR_MEMBER:.*]] = omp.map.info var_ptr(%[[NESTED_REGULAR_MEMBER:.*]] : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "one_l%nest%k"}
-+// CHECK:   %[[ALLOCATABLE_PARENT_BASE_ADDR:.*]] = fir.box_offset %[[ALLOCA]]#1 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>
-+// CHECK:   %[[MAP_ALLOCATABLE_PARENT_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[ALLOCA]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>, !fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>) var_ptr_ptr(%[[ALLOCATABLE_PARENT_BASE_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>> {name = ""}
-+// CHECK:   %[[MAP_ALLOCATABLE_PARENT_DESCRIPTOR:.*]] = omp.map.info var_ptr(%[[ALLOCA]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>, !fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>) map_clauses(to) capture(ByRef) members(%21, %15, %14, %19 : [0], [0,6,2], [0,6,2,0], [0,6,3] : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<i32>) -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>> {name = "one_l"}
-+// CHECK:    omp.target map_entries(%[[MAP_ALLOCATABLE_PARENT_BASE_ADDR]] -> %[[ARG1:.*]], %[[MAP_NESTED_ALLOCA_MEMBER]] -> %[[ARG2:.*]], %[[MAP_NESTED_ALLOCA_MEMBER_BASE_ADDR]] -> %[[ARG3:.*]], %[[MAP_NESTED_REGULAR_MEMBER]] -> %[[ARG4:.*]], %[[MAP_ALLOCATABLE_PARENT_DESCRIPTOR]] -> %[[ARG5:.*]] : !fir.llvm_ptr<!fir.ref<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<i32>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>) {
-+
-+// -----
-+
-+  func.func @_QPtest_nested_derived_type_alloca_map_operand_and_block_addition(%arg0 : !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>) {
-+    %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtest_nested_derived_type_alloca_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>) -> (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>, !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>)
-+    %1 = hlfir.designate %0#0{"nest"}   : (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>) -> !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+    %2 = hlfir.designate %1{"array_k"}   {fortran_attrs = #fir.var_attrs<allocatable>} : (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+    %3 = fir.load %2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+    %c0_0 = arith.constant 0 : index
-+    %4:3 = fir.box_dims %3, %c0_0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-+    %c1_16 = arith.constant 1 : index
-+    %c0_1 = arith.constant 0 : index
-+    %5:3 = fir.box_dims %3, %c0_1 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-+    %c0_18 = arith.constant 0 : index
-+    %6 = arith.subi %5#1, %c1_16 : index
-+    %7 = omp.map.bounds lower_bound(%c0_18 : index) upper_bound(%6 : index) extent(%5#1 : index) stride(%5#2 : index) start_idx(%4#0 : index) {stride_in_bytes = true}
-+    %c6_0 = arith.constant 6 : index
-+    %8 = fir.coordinate_of %0#0, %c6_0 : (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>, index) -> !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+    %c2_0 = arith.constant 2 : index
-+    %9 = fir.coordinate_of %8, %c2_0 : (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+    %10 = omp.map.info var_ptr(%9 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%7) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%nest%array_k"}
-+    %11 = omp.map.info var_ptr(%0#0 : !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>, !fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>) map_clauses(tofrom) capture(ByRef) members(%10 : [6,2] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>> {name = "one_l", partial_map = true}
-+    omp.target map_entries(%11 -> %arg1 : !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>) {
-+      omp.terminator
-+    }
-+    return
-+  }
-+
-+// CHECK:  func.func @_QPtest_nested_derived_type_alloca_map_operand_and_block_addition(%[[ARG0:.*]]: !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>) {
-+// CHECK:   %[[ALLOCA:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFtest_nested_derived_type_alloca_map_operand_and_block_additionEone_l"} : (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>) -> (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>, !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>)
-+// CHECK:   %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%{{.*}} : index) upper_bound(%{{.*}} : index) extent(%{{.*}} : index) stride(%{{.*}} : index) start_idx(%{{.*}} : index) {stride_in_bytes = true}
-+// CHECK:   %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[ALLOCA]]#0, %{{.*}} : (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>, index) -> !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>
-+// CHECK:   %[[ALLOCATABLE_MEMBER:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], %{{.*}} : (!fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>, index) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-+// CHECK:   %[[ALLOCATABLE_MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[ALLOCATABLE_MEMBER]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
-+// CHECK:   %[[MAP_ALLOCATABLE_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[ALLOCATABLE_MEMBER]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[ALLOCATABLE_MEMBER_BASE_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
-+// CHECK:   %[[MAP_ALLOCATABLE_MEMBER_DESCRIPTOR:.*]] = omp.map.info var_ptr(%[[ALLOCATABLE_MEMBER]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "one_l%nest%array_k"}
-+// CHECK:   %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%[[ALLOCA]]#0 : !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>, !fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>) map_clauses(tofrom) capture(ByRef) members(%12, %11 : [6,2], [6,2,0] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>> {name = "one_l", partial_map = true}
-+// CHECK:   omp.target map_entries(%[[MAP_ALLOCATABLE_MEMBER_DESCRIPTOR]] -> %[[ARG1:.*]], %[[MAP_ALLOCATABLE_MEMBER_BASE_ADDR]] -> %[[ARG2:.*]], %[[MAP_PARENT]] -> %[[ARG3:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>) {
-diff -Naur -x .git llvm-project.upstream/flang/tools/bbc/bbc.cpp llvm-project/flang/tools/bbc/bbc.cpp
---- llvm-project.upstream/flang/tools/bbc/bbc.cpp	2024-11-19 12:55:58.497825718 -0500
-+++ llvm-project/flang/tools/bbc/bbc.cpp	2024-11-19 12:49:05.230149110 -0500
+diff -Naur -x .git llvm-project-trunk/flang/tools/bbc/bbc.cpp llvm-project-trunk-atd/flang/tools/bbc/bbc.cpp
+--- llvm-project-trunk/flang/tools/bbc/bbc.cpp	2024-11-23 20:25:46.239205439 -0600
++++ llvm-project-trunk-atd/flang/tools/bbc/bbc.cpp	2024-11-23 20:26:13.835106217 -0600
 @@ -142,6 +142,12 @@
                         llvm::cl::desc("enable openmp device compilation"),
                         llvm::cl::init(false));
@@ -14428,9 +12259,9 @@ diff -Naur -x .git llvm-project.upstream/flang/tools/bbc/bbc.cpp llvm-project/fl
    (void)mlir::applyPassManagerCLOptions(pm);
    if (mlir::failed(pm.run(mlirModule))) {
      llvm::errs() << "FATAL: failed to correctly apply OpenMP pass pipeline";
-diff -Naur -x .git llvm-project.upstream/flang/tools/f18/CMakeLists.txt llvm-project/flang/tools/f18/CMakeLists.txt
---- llvm-project.upstream/flang/tools/f18/CMakeLists.txt	2024-11-19 12:55:00.633014427 -0500
-+++ llvm-project/flang/tools/f18/CMakeLists.txt	2024-09-24 17:16:21.660863828 -0400
+diff -Naur -x .git llvm-project-trunk/flang/tools/f18/CMakeLists.txt llvm-project-trunk-atd/flang/tools/f18/CMakeLists.txt
+--- llvm-project-trunk/flang/tools/f18/CMakeLists.txt	2024-10-18 17:40:44.720867045 -0500
++++ llvm-project-trunk-atd/flang/tools/f18/CMakeLists.txt	2024-09-13 09:59:03.557877989 -0500
 @@ -55,7 +55,7 @@
  set(module_objects "")
  
@@ -14464,9 +12295,9 @@ diff -Naur -x .git llvm-project.upstream/flang/tools/f18/CMakeLists.txt llvm-pro
      )
      add_custom_command(OUTPUT ${base}.f18.mod
        DEPENDS ${base}.mod
-diff -Naur -x .git llvm-project.upstream/flang/tools/flang-driver/CMakeLists.txt llvm-project/flang/tools/flang-driver/CMakeLists.txt
---- llvm-project.upstream/flang/tools/flang-driver/CMakeLists.txt	2024-11-19 12:55:00.633014427 -0500
-+++ llvm-project/flang/tools/flang-driver/CMakeLists.txt	2024-08-12 11:55:29.920279209 -0400
+diff -Naur -x .git llvm-project-trunk/flang/tools/flang-driver/CMakeLists.txt llvm-project-trunk-atd/flang/tools/flang-driver/CMakeLists.txt
+--- llvm-project-trunk/flang/tools/flang-driver/CMakeLists.txt	2024-11-23 20:30:46.682124151 -0600
++++ llvm-project-trunk-atd/flang/tools/flang-driver/CMakeLists.txt	2024-08-27 20:12:45.385832805 -0500
 @@ -11,38 +11,28 @@
    TargetParser
  )
@@ -14512,9 +12343,9 @@ diff -Naur -x .git llvm-project.upstream/flang/tools/flang-driver/CMakeLists.txt
 -# is a widely adopted name.
 -add_flang_symlink(flang-new flang)
 +install(TARGETS flang-new DESTINATION "${CMAKE_INSTALL_BINDIR}")
-diff -Naur -x .git llvm-project.upstream/flang/tools/flang-driver/driver.cpp llvm-project/flang/tools/flang-driver/driver.cpp
---- llvm-project.upstream/flang/tools/flang-driver/driver.cpp	2024-11-19 12:55:00.633014427 -0500
-+++ llvm-project/flang/tools/flang-driver/driver.cpp	2024-05-07 13:04:07.517067617 -0400
+diff -Naur -x .git llvm-project-trunk/flang/tools/flang-driver/driver.cpp llvm-project-trunk-atd/flang/tools/flang-driver/driver.cpp
+--- llvm-project-trunk/flang/tools/flang-driver/driver.cpp	2024-10-18 17:40:44.720867045 -0500
++++ llvm-project-trunk-atd/flang/tools/flang-driver/driver.cpp	2024-08-27 20:12:45.385832805 -0500
 @@ -88,15 +88,14 @@
    llvm::InitLLVM x(argc, argv);
    llvm::SmallVector<const char *, 256> args(argv, argv + argc);
@@ -14551,9 +12382,9 @@ diff -Naur -x .git llvm-project.upstream/flang/tools/flang-driver/driver.cpp llv
    // intrinsic. To this end, the frontend driver requires the list of the
    // original compiler options, which is not available through other means.
    // TODO: This way of passing information between the compiler and frontend
-diff -Naur -x .git llvm-project.upstream/.github/workflows/release-binaries.yml llvm-project/.github/workflows/release-binaries.yml
---- llvm-project.upstream/.github/workflows/release-binaries.yml	2024-11-19 12:54:59.787017179 -0500
-+++ llvm-project/.github/workflows/release-binaries.yml	2024-11-19 12:49:04.472151471 -0500
+diff -Naur -x .git llvm-project-trunk/.github/workflows/release-binaries.yml llvm-project-trunk-atd/.github/workflows/release-binaries.yml
+--- llvm-project-trunk/.github/workflows/release-binaries.yml	2024-10-18 17:40:44.248871886 -0500
++++ llvm-project-trunk-atd/.github/workflows/release-binaries.yml	2024-10-18 14:35:15.211089222 -0500
 @@ -328,7 +328,7 @@
        run: |
          # Build some of the mlir tools that take a long time to link
@@ -14563,9 +12394,9 @@ diff -Naur -x .git llvm-project.upstream/.github/workflows/release-binaries.yml
          fi
          ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/ \
              mlir-bytecode-parser-fuzzer \
-diff -Naur -x .git llvm-project.upstream/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
---- llvm-project.upstream/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h	2024-11-19 12:55:01.133012800 -0500
-+++ llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h	2024-11-19 12:49:05.668147745 -0500
+diff -Naur -x .git llvm-project-trunk/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm-project-trunk-atd/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+--- llvm-project-trunk/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h	2024-10-29 11:07:37.545575707 -0500
++++ llvm-project-trunk-atd/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h	2024-11-14 15:29:47.154382433 -0600
 @@ -110,7 +110,8 @@
  
    /// First separator used between the initial two parts of a name.
@@ -14799,9 +12630,9 @@ diff -Naur -x .git llvm-project.upstream/llvm/include/llvm/Frontend/OpenMP/OMPIR
        TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
        SmallVector<DependData> Dependencies = {}, bool HasNowait = false);
  
-diff -Naur -x .git llvm-project.upstream/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
---- llvm-project.upstream/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp	2024-11-19 12:55:01.279012325 -0500
-+++ llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp	2024-11-19 12:49:05.803147324 -0500
+diff -Naur -x .git llvm-project-trunk/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp llvm-project-trunk-atd/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+--- llvm-project-trunk/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp	2024-11-23 20:30:46.762123862 -0600
++++ llvm-project-trunk-atd/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp	2024-11-14 15:29:47.238382104 -0600
 @@ -154,6 +154,7 @@
    if (T.isAMDGPU()) {
      StringRef Features =
@@ -15742,9 +13573,9 @@ diff -Naur -x .git llvm-project.upstream/llvm/lib/Frontend/OpenMP/OMPIRBuilder.c
  
  OpenMPIRBuilder::InsertPointOrErrorTy
  OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
-diff -Naur -x .git llvm-project.upstream/llvm/lib/Transforms/Utils/CodeExtractor.cpp llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
---- llvm-project.upstream/llvm/lib/Transforms/Utils/CodeExtractor.cpp	2024-11-19 12:55:01.647011127 -0500
-+++ llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp	2024-11-19 12:49:06.121146333 -0500
+diff -Naur -x .git llvm-project-trunk/llvm/lib/Transforms/Utils/CodeExtractor.cpp llvm-project-trunk-atd/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+--- llvm-project-trunk/llvm/lib/Transforms/Utils/CodeExtractor.cpp	2024-11-14 15:29:01.250562983 -0600
++++ llvm-project-trunk-atd/llvm/lib/Transforms/Utils/CodeExtractor.cpp	2024-11-14 15:29:47.438381320 -0600
 @@ -1788,7 +1788,7 @@
      ReloadOutputs.push_back(alloca);
    }
@@ -15769,9 +13600,9 @@ diff -Naur -x .git llvm-project.upstream/llvm/lib/Transforms/Utils/CodeExtractor
      unsigned AggIdx = 0;
      for (Value *input : inputs) {
        if (!StructValues.contains(input))
-diff -Naur -x .git llvm-project.upstream/llvm/runtimes/CMakeLists.txt llvm-project/llvm/runtimes/CMakeLists.txt
---- llvm-project.upstream/llvm/runtimes/CMakeLists.txt	2024-11-19 12:55:01.671011049 -0500
-+++ llvm-project/llvm/runtimes/CMakeLists.txt	2024-11-19 12:49:06.144146262 -0500
+diff -Naur -x .git llvm-project-trunk/llvm/runtimes/CMakeLists.txt llvm-project-trunk-atd/llvm/runtimes/CMakeLists.txt
+--- llvm-project-trunk/llvm/runtimes/CMakeLists.txt	2024-11-14 15:29:01.262562937 -0600
++++ llvm-project-trunk-atd/llvm/runtimes/CMakeLists.txt	2024-11-14 15:29:47.450381273 -0600
 @@ -504,15 +504,15 @@
  
    if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES)
@@ -15793,9 +13624,9 @@ diff -Naur -x .git llvm-project.upstream/llvm/runtimes/CMakeLists.txt llvm-proje
      endif()
      foreach(dep opt llvm-link llvm-extract clang clang-offload-packager)
        if(TARGET ${dep})
-diff -Naur -x .git llvm-project.upstream/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp llvm-project/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
---- llvm-project.upstream/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp	2024-11-19 12:55:03.043006584 -0500
-+++ llvm-project/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp	2024-11-19 12:49:07.297142668 -0500
+diff -Naur -x .git llvm-project-trunk/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp llvm-project-trunk-atd/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+--- llvm-project-trunk/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp	2024-10-29 11:07:38.021574140 -0500
++++ llvm-project-trunk-atd/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp	2024-11-14 15:29:47.906379485 -0600
 @@ -629,6 +629,7 @@
        "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
    OpenMPIRBuilder OMPBuilder(*M);
@@ -16148,9 +13979,9 @@ diff -Naur -x .git llvm-project.upstream/llvm/unittests/Frontend/OpenMPIRBuilder
    OMPBuilder.initialize();
    F->setName("func");
    IRBuilder<> Builder(BB);
-diff -Naur -x .git llvm-project.upstream/mlir/docs/Dialects/OpenMPDialect/_index.md llvm-project/mlir/docs/Dialects/OpenMPDialect/_index.md
---- llvm-project.upstream/mlir/docs/Dialects/OpenMPDialect/_index.md	2024-11-19 12:55:03.094006419 -0500
-+++ llvm-project/mlir/docs/Dialects/OpenMPDialect/_index.md	2024-11-19 12:49:07.335142550 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/docs/Dialects/OpenMPDialect/_index.md llvm-project-trunk-atd/mlir/docs/Dialects/OpenMPDialect/_index.md
+--- llvm-project-trunk/mlir/docs/Dialects/OpenMPDialect/_index.md	2024-10-18 17:40:45.592858105 -0500
++++ llvm-project-trunk-atd/mlir/docs/Dialects/OpenMPDialect/_index.md	2024-11-14 15:29:47.914379454 -0600
 @@ -297,7 +297,8 @@
  introduction of private copies of the same underlying variable defined outside
  the MLIR operation the clause is attached to. Currently, clauses with this
@@ -16220,9 +14051,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/docs/Dialects/OpenMPDialect/_index
 +  omp.terminator
 +}
 +```
-diff -Naur -x .git llvm-project.upstream/mlir/include/mlir/Conversion/Passes.td llvm-project/mlir/include/mlir/Conversion/Passes.td
---- llvm-project.upstream/mlir/include/mlir/Conversion/Passes.td	2024-11-19 12:55:03.099006402 -0500
-+++ llvm-project/mlir/include/mlir/Conversion/Passes.td	2024-09-24 17:16:22.632860505 -0400
+diff -Naur -x .git llvm-project-trunk/mlir/include/mlir/Conversion/Passes.td llvm-project-trunk-atd/mlir/include/mlir/Conversion/Passes.td
+--- llvm-project-trunk/mlir/include/mlir/Conversion/Passes.td	2024-09-24 18:07:31.379686632 -0500
++++ llvm-project-trunk-atd/mlir/include/mlir/Conversion/Passes.td	2024-09-24 18:08:02.943348816 -0500
 @@ -754,7 +754,7 @@
  }
  
@@ -16232,9 +14063,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/include/mlir/Conversion/Passes.td
  //===----------------------------------------------------------------------===//
  
  def ConvertMathToROCDL : Pass<"convert-math-to-rocdl", "ModuleOp"> {
-diff -Naur -x .git llvm-project.upstream/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
---- llvm-project.upstream/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h	2024-11-19 12:54:46.900059086 -0500
-+++ llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h	2024-11-19 12:49:07.351142500 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h llvm-project-trunk-atd/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
+--- llvm-project-trunk/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h	2024-09-13 09:56:38.076619000 -0500
++++ llvm-project-trunk-atd/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h	2024-11-23 20:26:14.687103152 -0600
 @@ -41,6 +41,12 @@
  // Extra operation operand structures.
  //===----------------------------------------------------------------------===//
@@ -16248,9 +14079,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/include/mlir/Dialect/OpenMP/OpenMP
  // TODO: Add `indirect` clause.
  using DeclareTargetOperands = detail::Clauses<DeviceTypeClauseOps>;
  
-diff -Naur -x .git llvm-project.upstream/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
---- llvm-project.upstream/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td	2024-11-19 12:55:03.111006363 -0500
-+++ llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td	2024-11-19 12:49:07.352142497 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td llvm-project-trunk-atd/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
+--- llvm-project-trunk/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td	2024-11-14 15:29:01.562561753 -0600
++++ llvm-project-trunk-atd/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td	2024-11-14 15:29:47.922379422 -0600
 @@ -445,6 +445,44 @@
  def OpenMP_HintClause : OpenMP_HintClauseSkip<>;
  
@@ -16296,9 +14127,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/include/mlir/Dialect/OpenMP/OpenMP
  // V5.2: [3.4] `if` clause
  //===----------------------------------------------------------------------===//
  
-diff -Naur -x .git llvm-project.upstream/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
---- llvm-project.upstream/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td	2024-11-19 12:55:03.111006363 -0500
-+++ llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td	2024-11-19 12:49:07.352142497 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td llvm-project-trunk-atd/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
+--- llvm-project-trunk/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td	2024-10-18 17:40:45.604857982 -0500
++++ llvm-project-trunk-atd/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td	2024-11-14 15:29:47.922379422 -0600
 @@ -25,6 +25,10 @@
  
    let methods = [
@@ -16356,19 +14187,10 @@ diff -Naur -x .git llvm-project.upstream/mlir/include/mlir/Dialect/OpenMP/OpenMP
      if ($_op->getRegion(0).getNumArguments() < expectedArgs)
        return $_op->emitOpError() << "expected at least " << expectedArgs
                                   << " entry block argument(s)";
-diff -Naur -x .git llvm-project.upstream/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
---- llvm-project.upstream/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td	2024-11-19 12:55:59.032823969 -0500
-+++ llvm-project/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td	2024-11-19 12:49:07.352142497 -0500
-@@ -939,7 +939,7 @@
-                        TypeAttr:$var_type,
-                        Optional<OpenMP_PointerLikeType>:$var_ptr_ptr,
-                        Variadic<OpenMP_PointerLikeType>:$members,
--                       OptionalAttr<AnyIntElementsAttr>:$members_index,
-+                       OptionalAttr<IndexListArrayAttr>:$members_index,
-                        Variadic<OpenMP_MapBoundsType>:$bounds, /* rank-0 to rank-{n-1} */
-                        OptionalAttr<UI64Attr>:$map_type,
-                        OptionalAttr<VariableCaptureKindAttr>:$map_capture_type,
-@@ -1166,9 +1166,10 @@
+diff -Naur -x .git llvm-project-trunk/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td llvm-project-trunk-atd/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+--- llvm-project-trunk/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td	2024-11-23 20:25:46.895203081 -0600
++++ llvm-project-trunk-atd/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td	2024-11-23 20:26:14.687103152 -0600
+@@ -1213,9 +1213,10 @@
    ], clauses = [
      // TODO: Complete clause list (defaultmap, uses_allocators).
      OpenMP_AllocateClause, OpenMP_DependClause, OpenMP_DeviceClause,
@@ -16382,7 +14204,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/include/mlir/Dialect/OpenMP/OpenMP
    ], singleRegion = true> {
    let summary = "target construct";
    let description = [{
-@@ -1178,6 +1179,21 @@
+@@ -1225,6 +1226,21 @@
      The optional `if_expr` parameter specifies a boolean result of a conditional
      check. If this value is 1 or is not provided then the target region runs on
      a device, if it is 0 then the target region is executed on the host device.
@@ -16404,7 +14226,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/include/mlir/Dialect/OpenMP/OpenMP
    }] # clausesDescription;
  
    let builders = [
-@@ -1186,13 +1202,30 @@
+@@ -1233,13 +1249,30 @@
  
    let extraClassDeclaration = [{
      unsigned numMapBlockArgs() { return getMapVars().size(); }
@@ -16439,9 +14261,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/include/mlir/Dialect/OpenMP/OpenMP
    }];
  
    let hasVerifier = 1;
-diff -Naur -x .git llvm-project.upstream/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp llvm-project/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
---- llvm-project.upstream/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp	2024-11-19 12:55:59.041823940 -0500
-+++ llvm-project/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp	2024-11-19 12:49:07.402142341 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp llvm-project-trunk-atd/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+--- llvm-project-trunk/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp	2024-11-23 20:25:46.907203037 -0600
++++ llvm-project-trunk-atd/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp	2024-11-23 20:26:14.703103095 -0600
 @@ -20,7 +20,6 @@
  #include "mlir/IR/DialectImplementation.h"
  #include "mlir/IR/OpImplementation.h"
@@ -16524,91 +14346,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Dialect/OpenMP/IR/OpenMPDialec
    args.inReductionArgs.emplace(inReductionVars, inReductionTypes,
                                 inReductionByref, inReductionSyms);
    args.mapArgs.emplace(mapVars, mapTypes);
-@@ -1395,16 +1408,15 @@
- }
- 
- static ParseResult parseMembersIndex(OpAsmParser &parser,
--                                     DenseIntElementsAttr &membersIdx) {
--  SmallVector<APInt> values;
-+                                     ArrayAttr &membersIdx) {
-+  SmallVector<Attribute, 4> values, memberIdxs;
-   int64_t value;
--  int64_t shape[2] = {0, 0};
--  unsigned shapeTmp = 0;
-+
-   auto parseIndices = [&]() -> ParseResult {
-     if (parser.parseInteger(value))
-       return failure();
--    shapeTmp++;
--    values.push_back(APInt(32, value, /*isSigned=*/true));
-+    values.push_back(IntegerAttr::get(parser.getBuilder().getIntegerType(64),
-+                                      mlir::APInt(64, value)));
-     return success();
-   };
- 
-@@ -1418,50 +1430,31 @@
-     if (failed(parser.parseRSquare()))
-       return failure();
- 
--    // Only set once, if any indices are not the same size
--    // we error out in the next check as that's unsupported
--    if (shape[1] == 0)
--      shape[1] = shapeTmp;
--
--    // Verify that the recently parsed list is equal to the
--    // first one we parsed, they must be equal lengths to
--    // keep the rectangular shape DenseIntElementsAttr
--    // requires
--    if (shapeTmp != shape[1])
--      return failure();
--
--    shapeTmp = 0;
--    shape[0]++;
-+    memberIdxs.push_back(ArrayAttr::get(parser.getContext(), values));
-+    values.clear();
-   } while (succeeded(parser.parseOptionalComma()));
- 
--  if (!values.empty()) {
--    ShapedType valueType =
--        VectorType::get(shape, IntegerType::get(parser.getContext(), 32));
--    membersIdx = DenseIntElementsAttr::get(valueType, values);
--  }
-+  if (!memberIdxs.empty())
-+    membersIdx = ArrayAttr::get(parser.getContext(), memberIdxs);
- 
-   return success();
- }
- 
- static void printMembersIndex(OpAsmPrinter &p, MapInfoOp op,
--                              DenseIntElementsAttr membersIdx) {
--  llvm::ArrayRef<int64_t> shape = membersIdx.getShapedType().getShape();
--  assert(shape.size() <= 2);
--
-+                              ArrayAttr membersIdx) {
-   if (!membersIdx)
-     return;
- 
--  for (int i = 0; i < shape[0]; ++i) {
-+  for (size_t i = 0; i < membersIdx.getValue().size(); i++) {
-+    auto memberIdx = mlir::cast<mlir::ArrayAttr>(membersIdx.getValue()[i]);
-     p << "[";
--    int rowOffset = i * shape[1];
--    for (int j = 0; j < shape[1]; ++j) {
--      p << membersIdx.getValues<int32_t>()[rowOffset + j];
--      if ((j + 1) < shape[1])
-+    for (size_t j = 0; j < memberIdx.getValue().size(); j++) {
-+      p << mlir::cast<mlir::IntegerAttr>(memberIdx.getValue()[j]).getInt();
-+      if ((j + 1) < memberIdx.getValue().size())
-         p << ",";
-     }
-     p << "]";
--
--    if ((i + 1) < shape[0])
-+    if ((i + 1) < membersIdx.getValue().size())
-       p << ", ";
-   }
- }
-@@ -1676,20 +1669,170 @@
+@@ -1652,20 +1665,170 @@
    // inReductionByref, inReductionSyms.
    TargetOp::build(builder, state, /*allocate_vars=*/{}, /*allocator_vars=*/{},
                    makeArrayAttr(ctx, clauses.dependKinds), clauses.dependVars,
@@ -16780,7 +14518,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Dialect/OpenMP/IR/OpenMPDialec
  //===----------------------------------------------------------------------===//
  // ParallelOp
  //===----------------------------------------------------------------------===//
-@@ -1822,24 +1965,16 @@
+@@ -1798,24 +1961,16 @@
    // Check parent region
    // TODO If nested inside of a target region, also check that it does not
    // contain any statements, declarations or directives other than this
@@ -16812,7 +14550,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Dialect/OpenMP/IR/OpenMPDialec
  
    // Check for allocate clause restrictions
    if (getAllocateVars().size() != getAllocatorVars().size())
-@@ -2112,6 +2247,7 @@
+@@ -2120,6 +2275,7 @@
      if (!isComposite())
        return emitError()
               << "'omp.composite' attribute missing from composite wrapper";
@@ -16820,9 +14558,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Dialect/OpenMP/IR/OpenMPDialec
      // Check for the allowed leaf constructs that may appear in a composite
      // construct directly after DISTRIBUTE.
      if (isa<WsloopOp>(nested)) {
-diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp llvm-project/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
---- llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp	2024-11-19 12:55:59.044823930 -0500
-+++ llvm-project/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp	2024-11-19 12:49:07.423142276 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp llvm-project-trunk-atd/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+--- llvm-project-trunk/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp	2024-11-23 20:25:46.915203008 -0600
++++ llvm-project-trunk-atd/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp	2024-11-23 20:26:14.711103067 -0600
 @@ -32,6 +32,7 @@
  #include "llvm/IR/ReplaceConstant.h"
  #include "llvm/Support/FileSystem.h"
@@ -17234,7 +14972,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
    return success();
  }
  
-@@ -1716,6 +1927,11 @@
+@@ -1713,6 +1924,11 @@
  static LogicalResult
  convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
                   LLVM::ModuleTranslation &moduleTranslation) {
@@ -17246,7 +14984,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
    auto wsloopOp = cast<omp::WsloopOp>(opInst);
    if (failed(checkImplementationStatus(opInst)))
      return failure();
-@@ -1741,8 +1957,6 @@
+@@ -1738,8 +1954,6 @@
  
    SmallVector<omp::DeclareReductionOp> reductionDecls;
    collectReductionDecls(wsloopOp, reductionDecls);
@@ -17255,7 +14993,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
  
    SmallVector<llvm::Value *> privateReductionVariables(
        wsloopOp.getNumReductionVars());
-@@ -1752,7 +1966,7 @@
+@@ -1749,7 +1963,7 @@
        cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
  
    if (failed(allocAndInitializeReductionVars(
@@ -17264,7 +15002,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
            reductionDecls, privateReductionVariables, reductionVariableMap,
            isByRef)))
      return failure();
-@@ -1773,6 +1987,9 @@
+@@ -1770,6 +1984,9 @@
    // Set up the source location value for OpenMP runtime.
    llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
  
@@ -17274,7 +15012,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
    // Generator of the canonical loop body.
    SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
    SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
-@@ -1790,11 +2007,27 @@
+@@ -1787,11 +2004,27 @@
      if (loopInfos.size() != loopOp.getNumLoops() - 1)
        return llvm::Error::success();
  
@@ -17306,7 +15044,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
    };
  
    // Delegate actual loop construction to the OpenMP IRBuilder.
-@@ -1802,7 +2035,6 @@
+@@ -1799,7 +2032,6 @@
    // loop, i.e. it has a positive step, uses signed integer semantics.
    // Reconsider this code when the nested loop operation clearly supports more
    // cases.
@@ -17314,7 +15052,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
    for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
      llvm::Value *lowerBound =
          moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
-@@ -1837,19 +2069,30 @@
+@@ -1834,19 +2066,30 @@
    llvm::CanonicalLoopInfo *loopInfo =
        ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
  
@@ -17352,7 +15090,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
  
    if (failed(handleError(wsloopIP, opInst)))
      return failure();
-@@ -1861,9 +2104,10 @@
+@@ -1858,9 +2101,10 @@
    builder.restoreIP(afterIP);
  
    // Process the reductions if required.
@@ -17366,7 +15104,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
  }
  
  /// Converts the OpenMP parallel operation to LLVM IR.
-@@ -2078,7 +2322,7 @@
+@@ -2072,7 +2316,7 @@
  
        llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
            ompBuilder->createReductions(builder.saveIP(), allocaIP,
@@ -17375,7 +15113,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
        if (!contInsertPoint)
          return contInsertPoint.takeError();
  
-@@ -2177,77 +2421,19 @@
+@@ -2171,77 +2415,19 @@
  static LogicalResult
  convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
                 LLVM::ModuleTranslation &moduleTranslation) {
@@ -17459,7 +15197,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
  
    llvm::ConstantInt *simdlen = nullptr;
    if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
-@@ -2757,7 +2943,8 @@
+@@ -2751,7 +2937,8 @@
        // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
        // some adjustment for members with more complex types.
        return builder.CreateMul(elementCount,
@@ -17469,168 +15207,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
      }
    }
  
-@@ -2876,49 +3063,49 @@
-   return std::distance(mapData.MapClause.begin(), res);
- }
- 
--static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
--                                                    bool first) {
--  DenseIntElementsAttr indexAttr = mapInfo.getMembersIndexAttr();
--
-+static mlir::omp::MapInfoOp
-+getFirstOrLastMappedMemberPtr(mlir::omp::MapInfoOp mapInfo, bool first) {
-+  mlir::ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
-   // Only 1 member has been mapped, we can return it.
-   if (indexAttr.size() == 1)
-     if (auto mapOp =
-             dyn_cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp()))
-       return mapOp;
- 
--  llvm::ArrayRef<int64_t> shape = indexAttr.getShapedType().getShape();
--  llvm::SmallVector<size_t> indices(shape[0]);
-+  llvm::SmallVector<size_t> indices(indexAttr.size());
-   std::iota(indices.begin(), indices.end(), 0);
- 
--  llvm::sort(indices.begin(), indices.end(),
--             [&](const size_t a, const size_t b) {
--               auto indexValues = indexAttr.getValues<int32_t>();
--               for (int i = 0; i < shape[1]; ++i) {
--                 int aIndex = indexValues[a * shape[1] + i];
--                 int bIndex = indexValues[b * shape[1] + i];
--
--                 if (aIndex == bIndex)
--                   continue;
--
--                 if (aIndex != -1 && bIndex == -1)
--                   return false;
--
--                 if (aIndex == -1 && bIndex != -1)
--                   return true;
--
--                 // A is earlier in the record type layout than B
--                 if (aIndex < bIndex)
--                   return first;
--
--                 if (bIndex < aIndex)
--                   return !first;
--               }
--
--               // Iterated the entire list and couldn't make a decision, all
--               // elements were likely the same. Return false, since the sort
--               // comparator should return false for equal elements.
--               return false;
--             });
-+  llvm::sort(
-+      indices.begin(), indices.end(), [&](const size_t a, const size_t b) {
-+        auto memberIndicesA = mlir::cast<mlir::ArrayAttr>(indexAttr[a]);
-+        auto memberIndicesB = mlir::cast<mlir::ArrayAttr>(indexAttr[b]);
-+
-+        size_t smallestMember = memberIndicesA.size() < memberIndicesB.size()
-+                                    ? memberIndicesA.size()
-+                                    : memberIndicesB.size();
-+        for (size_t i = 0; i < smallestMember; ++i) {
-+          int64_t aIndex =
-+              mlir::cast<mlir::IntegerAttr>(memberIndicesA.getValue()[i])
-+                  .getInt();
-+          int64_t bIndex =
-+              mlir::cast<mlir::IntegerAttr>(memberIndicesB.getValue()[i])
-+                  .getInt();
-+
-+          if (aIndex == bIndex)
-+            continue;
-+
-+          if (aIndex < bIndex)
-+            return first;
-+
-+          if (aIndex > bIndex)
-+            return !first;
-+        }
-+
-+        // Iterated up until the end of the smallest member and
-+        // they were found to be equal up to that point, so select
-+        // the member with the lowest index count, so the "parent"
-+        return memberIndicesA.size() < memberIndicesB.size();
-+      });
- 
-   return llvm::cast<omp::MapInfoOp>(
-       mapInfo.getMembers()[indices.front()].getDefiningOp());
-@@ -3071,10 +3258,13 @@
-     auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
-     int firstMemberIdx = getMapDataMemberIdx(
-         mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
--    lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
--                                        builder.getPtrTy());
-     int lastMemberIdx = getMapDataMemberIdx(
-         mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
-+
-+    // NOTE/TODO: Should perhaps use OriginalValue here instead of Pointers to
-+    // avoid offset or any manipulations interfering with the calculation.
-+    lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
-+                                        builder.getPtrTy());
-     highAddr = builder.CreatePointerCast(
-         builder.CreateGEP(mapData.BaseType[lastMemberIdx],
-                           mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
-@@ -3088,17 +3278,8 @@
-       /*isSigned=*/false);
-   combinedInfo.Sizes.push_back(size);
- 
--  // TODO: This will need to be expanded to include the whole host of logic for
--  // the map flags that Clang currently supports (e.g. it should take the map
--  // flag of the parent map flag, remove the OMP_MAP_TARGET_PARAM and do some
--  // further case specific flag modifications). For the moment, it handles what
--  // we support as expected.
--  llvm::omp::OpenMPOffloadMappingFlags mapFlag =
--      llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
--
-   llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
-       ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
--  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
- 
-   // This creates the initial MEMBER_OF mapping that consists of
-   // the parent/top level container (same as above effectively, except
-@@ -3107,9 +3288,15 @@
-   // only relevant if the structure in its totality is being mapped,
-   // otherwise the above suffices.
-   if (!parentClause.getPartialMap()) {
-+    // TODO: This will need to be expanded to include the whole host of logic
-+    // for the map flags that Clang currently supports (e.g. it should do some
-+    // further case specific flag modifications). For the moment, it handles
-+    // what we support as expected.
-+    llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
-+    ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
-     combinedInfo.Types.emplace_back(mapFlag);
-     combinedInfo.DevicePointers.emplace_back(
--        llvm::OpenMPIRBuilder::DeviceInfoTy::None);
-+        mapData.DevicePointers[mapDataIndex]);
-     combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
-         mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
-     combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
-@@ -3157,6 +3344,24 @@
- 
-     assert(memberDataIdx >= 0 && "could not find mapped member of structure");
- 
-+    if (checkIfPointerMap(memberClause)) {
-+      auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags(
-+          memberClause.getMapType().value());
-+      mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
-+      mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
-+      ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
-+      combinedInfo.Types.emplace_back(mapFlag);
-+      combinedInfo.DevicePointers.emplace_back(
-+          llvm::OpenMPIRBuilder::DeviceInfoTy::None);
-+      combinedInfo.Names.emplace_back(
-+          LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
-+      combinedInfo.BasePointers.emplace_back(
-+          mapData.BasePointers[mapDataIndex]);
-+      combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
-+      combinedInfo.Sizes.emplace_back(builder.getInt64(
-+          moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
-+    }
-+
-     // Same MemberOfFlag to indicate its link with parent and other members
-     // of.
-     auto mapFlag =
-@@ -3169,10 +3374,17 @@
+@@ -3175,7 +3362,7 @@
  
      combinedInfo.Types.emplace_back(mapFlag);
      combinedInfo.DevicePointers.emplace_back(
@@ -17638,19 +15215,8 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
 +        llvm::OpenMPIRBuilder::DeviceInfoTy::None);
      combinedInfo.Names.emplace_back(
          LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
--    combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
-+
-+    if (checkIfPointerMap(memberClause))
-+      combinedInfo.BasePointers.emplace_back(
-+          mapData.BasePointers[memberDataIdx]);
-+    else
-+      combinedInfo.BasePointers.emplace_back(
-+          mapData.BasePointers[mapDataIndex]);
-+
-     combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
-     combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]);
-   }
-@@ -3538,9 +3750,14 @@
+     uint64_t basePointerIndex =
+@@ -3557,9 +3744,14 @@
                         return info.DevicePtrInfoMap[basePointer].second;
                       });
  
@@ -17668,7 +15234,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
        }
        break;
      case BodyGenTy::DupNoPriv:
-@@ -3549,6 +3766,7 @@
+@@ -3568,6 +3760,7 @@
        // If device info is available then region has already been generated
        if (info.DevicePtrInfoMap.empty()) {
          builder.restoreIP(codeGenIP);
@@ -17676,8 +15242,8 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
          // For device pass, if use_device_ptr(addr) mappings were present,
          // we need to link them here before codegen.
          if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
-@@ -3560,9 +3778,14 @@
-                        mapData.BasePointers, mapData.DevicePointers);
+@@ -3579,9 +3772,14 @@
+                        useDevicePtrVars, mapData);
          }
  
 -        if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
@@ -17694,7 +15260,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
        }
        break;
      }
-@@ -3589,6 +3812,64 @@
+@@ -3608,6 +3806,64 @@
    return success();
  }
  
@@ -17759,7 +15325,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
  /// Lowers the FlagsAttr which is applied to the module on the device
  /// pass when offloading, this attribute contains OpenMP RTL globals that can
  /// be passed as flags to the frontend, otherwise they are set to default
-@@ -3764,7 +4045,7 @@
+@@ -3783,7 +4039,7 @@
        ompBuilder.M.getDataLayout().getProgramAddressSpace();
  
    // Create the alloca for the argument the current point.
@@ -17768,7 +15334,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
  
    if (allocaAS != defaultAS && arg.getType()->isPointerTy())
      v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
-@@ -3795,6 +4076,301 @@
+@@ -3814,6 +4070,301 @@
    return builder.saveIP();
  }
  
@@ -18070,7 +15636,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
  static LogicalResult
  convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
                   LLVM::ModuleTranslation &moduleTranslation) {
-@@ -3804,12 +4380,14 @@
+@@ -3823,12 +4374,14 @@
  
    llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
    bool isTargetDevice = ompBuilder->Config.isTargetDevice();
@@ -18087,7 +15653,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
    llvm::Function *llvmOutlinedFn = nullptr;
  
    // TODO: It can also be false if a compile-time constant `false` IF clause is
-@@ -3852,7 +4430,7 @@
+@@ -3871,7 +4424,7 @@
        OperandRange privateVars = targetOp.getPrivateVars();
        std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
        MutableArrayRef<BlockArgument> privateBlockArgs =
@@ -18096,7 +15662,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
  
        for (auto [privVar, privatizerNameAttr, privBlockArg] :
             llvm::zip_equal(privateVars, *privateSyms, privateBlockArgs)) {
-@@ -3886,6 +4464,7 @@
+@@ -3905,6 +4458,7 @@
        return exitBlock.takeError();
  
      builder.SetInsertPoint(*exitBlock);
@@ -18104,7 +15670,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
      return builder.saveIP();
    };
  
-@@ -3897,9 +4476,6 @@
+@@ -3916,9 +4470,6 @@
    if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName))
      return failure();
  
@@ -18114,7 +15680,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
    llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
        findAllocaInsertPoint(builder, moduleTranslation);
  
-@@ -3935,6 +4511,29 @@
+@@ -3954,6 +4505,29 @@
    };
  
    llvm::SmallVector<llvm::Value *, 4> kernelInput;
@@ -18144,7 +15710,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
    for (size_t i = 0; i < mapVars.size(); ++i) {
      // declare target arguments are not passed to kernels as arguments
      // TODO: We currently do not handle cases where a member is explicitly
-@@ -3949,11 +4548,16 @@
+@@ -3969,11 +4543,16 @@
    buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
                    moduleTranslation, dds);
  
@@ -18164,7 +15730,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
  
    if (failed(handleError(afterIP, opInst)))
      return failure();
-@@ -4059,25 +4663,6 @@
+@@ -4079,25 +4658,6 @@
    return success();
  }
  
@@ -18190,7 +15756,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
  /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
  /// (including OpenMP runtime calls).
  static LogicalResult
-@@ -4194,6 +4779,9 @@
+@@ -4214,6 +4774,9 @@
        .Case([&](omp::TargetOp) {
          return convertOmpTarget(*op, builder, moduleTranslation);
        })
@@ -18200,7 +15766,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
        .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
            [&](auto op) {
              // No-op, should be handled by relevant owning operations e.g.
-@@ -4206,6 +4794,38 @@
+@@ -4226,6 +4789,38 @@
        });
  }
  
@@ -18239,7 +15805,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
  static LogicalResult
  convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
                        LLVM::ModuleTranslation &moduleTranslation) {
-@@ -4236,7 +4856,8 @@
+@@ -4256,7 +4851,8 @@
    return failure(interrupted);
  }
  
@@ -18249,7 +15815,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
  
  /// Implementation of the dialect interface that converts operations belonging
  /// to the OpenMP dialect to LLVM IR.
-@@ -4251,16 +4872,14 @@
+@@ -4271,16 +4867,14 @@
    convertOperation(Operation *op, llvm::IRBuilderBase &builder,
                     LLVM::ModuleTranslation &moduleTranslation) const final;
  
@@ -18268,7 +15834,7 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
  LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
      Operation *op, ArrayRef<llvm::Instruction *> instructions,
      NamedAttribute attribute,
-@@ -4374,11 +4993,10 @@
+@@ -4394,11 +4988,10 @@
  
    llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
    if (ompBuilder->Config.isTargetDevice()) {
@@ -18282,9 +15848,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/lib/Target/LLVMIR/Dialect/OpenMP/O
    }
    return convertHostOrTargetOperation(op, builder, moduleTranslation);
  }
-diff -Naur -x .git llvm-project.upstream/mlir/test/Dialect/OpenMP/invalid.mlir llvm-project/mlir/test/Dialect/OpenMP/invalid.mlir
---- llvm-project.upstream/mlir/test/Dialect/OpenMP/invalid.mlir	2024-11-19 12:55:59.051823907 -0500
-+++ llvm-project/mlir/test/Dialect/OpenMP/invalid.mlir	2024-11-19 12:49:07.448142198 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Dialect/OpenMP/invalid.mlir llvm-project-trunk-atd/mlir/test/Dialect/OpenMP/invalid.mlir
+--- llvm-project-trunk/mlir/test/Dialect/OpenMP/invalid.mlir	2024-11-23 20:25:46.923202980 -0600
++++ llvm-project-trunk-atd/mlir/test/Dialect/OpenMP/invalid.mlir	2024-11-23 20:26:14.719103038 -0600
 @@ -1391,24 +1391,18 @@
  // -----
  
@@ -18397,9 +15963,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Dialect/OpenMP/invalid.mlir l
     "func.return"() : () -> ()
  }
  
-diff -Naur -x .git llvm-project.upstream/mlir/test/Dialect/OpenMP/ops.mlir llvm-project/mlir/test/Dialect/OpenMP/ops.mlir
---- llvm-project.upstream/mlir/test/Dialect/OpenMP/ops.mlir	2024-11-19 12:55:59.051823907 -0500
-+++ llvm-project/mlir/test/Dialect/OpenMP/ops.mlir	2024-11-19 12:49:07.449142195 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Dialect/OpenMP/ops.mlir llvm-project-trunk-atd/mlir/test/Dialect/OpenMP/ops.mlir
+--- llvm-project-trunk/mlir/test/Dialect/OpenMP/ops.mlir	2024-11-23 20:25:46.923202980 -0600
++++ llvm-project-trunk-atd/mlir/test/Dialect/OpenMP/ops.mlir	2024-11-23 20:26:14.719103038 -0600
 @@ -770,7 +770,7 @@
      "omp.target"(%device, %if_cond, %num_threads) ({
         // CHECK: omp.terminator
@@ -18452,9 +16018,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Dialect/OpenMP/ops.mlir llvm-
  // CHECK-LABEL: omp_loop
  func.func @omp_loop(%lb : index, %ub : index, %step : index) {
    // CHECK: omp.loop {
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir	2024-11-19 12:55:03.237005953 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir	2024-11-19 12:49:07.461142157 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir	2024-10-18 14:35:11.055133883 -0500
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir	2024-10-18 14:35:16.803072114 -0500
 @@ -1,6 +1,6 @@
  // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
  
@@ -18472,9 +16038,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-byref
  
  // CHECK: entry:
  // CHECK: %[[ALLOCA_BYREF:.*]] = alloca ptr, align 8
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir	2024-11-19 12:55:03.237005953 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir	2024-11-19 12:49:07.461142157 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir	2024-10-18 14:35:11.055133883 -0500
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir	2024-10-18 14:35:16.803072114 -0500
 @@ -10,7 +10,7 @@
  // constant sized) allocations performs its task reasonably in these 
  // scenarios. 
@@ -18493,9 +16059,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-const
  // CHECK-NEXT: entry:
  // CHECK-NEXT:  %[[MOVED_ALLOCA1:.*]] = alloca { ptr }, align 8
  // CHECK-NEXT:  %[[MOVED_ALLOCA2:.*]] = alloca i32, i64 1, align 4
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir	2024-11-19 12:55:03.237005953 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir	2024-11-19 12:49:07.461142157 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir	2024-10-18 14:35:11.055133883 -0500
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir	2024-10-18 14:35:16.803072114 -0500
 @@ -1,6 +1,6 @@
  // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
  
@@ -18513,9 +16079,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-const
  
  // CHECK: %[[ARG1_ALLOCA:.*]] = alloca ptr, align 8
  // CHECK: store ptr %[[ARG1]], ptr %[[ARG1_ALLOCA]], align 8
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-debug.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-debug.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-debug.mlir	2024-11-19 12:55:03.237005953 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-debug.mlir	2024-11-19 12:49:07.461142157 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-debug.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-debug.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-debug.mlir	2024-10-18 14:35:11.055133883 -0500
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-debug.mlir	2024-10-18 14:35:16.803072114 -0500
 @@ -1,6 +1,6 @@
  // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
  
@@ -18524,9 +16090,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-debug
    llvm.func @_QQmain() {
      %0 = llvm.mlir.constant(1 : i32) : i32
      %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir	2024-11-19 12:55:03.237005953 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir	2024-11-19 12:49:07.461142157 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir	2024-10-18 14:35:11.055133883 -0500
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir	2024-10-18 14:35:16.803072114 -0500
 @@ -7,7 +7,7 @@
  // Unfortunately, only so much can be tested as the device side is dependent on a *.bc
  // file created by the host and appended as an attribute to the module.
@@ -18536,9 +16102,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-decla
    // CHECK-DAG: @_QMtest_0Esp_decl_tgt_ref_ptr = weak global ptr null, align 8
    llvm.mlir.global external @_QMtest_0Esp() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : i32 {
      %0 = llvm.mlir.constant(0 : i32) : i32
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir	2024-11-19 12:49:07.461142157 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir	2024-10-18 14:35:16.803072114 -0500
 @@ -0,0 +1,329 @@
 +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 +
@@ -18869,127 +16435,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-fortr
 +// CHECK:  store ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[BASE_PTRS]], align 8
 +// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
 +// CHECK:  store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir	2024-11-19 12:55:59.053823900 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir	2024-11-19 12:49:07.461142157 -0500
-@@ -1,9 +1,9 @@
- // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
- 
- // This test checks the offload sizes, map types and base pointers and pointers
--// provided to the OpenMP kernel argument structure are correct when lowering 
--// to LLVM-IR from MLIR when a fortran allocatable descriptor type is provided 
--// alongside the omp.map.info, the test utilises mapping of array sections, 
-+// provided to the OpenMP kernel argument structure are correct when lowering
-+// to LLVM-IR from MLIR when a fortran allocatable descriptor type is provided
-+// alongside the omp.map.info, the test utilises mapping of array sections,
- // full arrays and individual allocated scalars.
- 
- module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
-@@ -59,9 +59,9 @@
- 
- // CHECK: @[[FULL_ARR_GLOB:.*]] = internal global { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } undef
- // CHECK: @[[ARR_SECT_GLOB:.*]] = internal global { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } undef
--// CHECK: @.offload_sizes = private unnamed_addr constant [9 x i64] [i64 0, i64 48, i64 0, i64 0, i64 48, i64 0, i64 0, i64 24, i64 4]
--// CHECK: @.offload_maptypes = private unnamed_addr constant [9 x i64] [i64 32, i64 281474976710657, i64 281474976710675, i64 32, i64 1125899906842625, i64 1125899906842643, i64 32, i64 1970324836974593, i64 1970324836974611]
--// CHECK: @.offload_mapnames = private constant [9 x ptr] [ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}]
-+// CHECK: @.offload_sizes = private unnamed_addr constant [12 x i64] [i64 0, i64 48, i64 8, i64 0, i64 0, i64 48, i64 8, i64 0, i64 0, i64 24, i64 8, i64 4]
-+// CHECK: @.offload_maptypes = private unnamed_addr constant [12 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 32, i64 1407374883553283, i64 1407374883553283, i64 1407374883553299, i64 32, i64 2533274790395907, i64 2533274790395907, i64 2533274790395923]
-+// CHECK: @.offload_mapnames = private constant [12 x ptr] [ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}]
- 
- // CHECK: define void @_QQmain()
- // CHECK: %[[SCALAR_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8
-@@ -93,53 +93,63 @@
- // CHECK: %[[SCALAR_DESC_SZ1:.*]] = sub i64 %[[SCALAR_DESC_SZ3]], %[[SCALAR_DESC_SZ2]]
- // CHECK: %[[SCALAR_DESC_SZ:.*]] = sdiv exact i64 %[[SCALAR_DESC_SZ1]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
- 
--// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
-+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
- // CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADBASEPTRS]], align 8
--// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 0
- // CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADPTRS]], align 8
--
--// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 0
-+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 0
- // CHECK: store i64 %[[FULL_ARR_DESC_SIZE]], ptr %[[OFFLOADSIZES]], align 8
--
--// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
-+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
-+// CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 1
-+// CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADPTRS]], align 8
-+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
- // CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADBASEPTRS]], align 8
--// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 1
-+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 2
- // CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADPTRS]], align 8
--// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
-+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
- // CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADBASEPTRS]], align 8
--// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 3
- // CHECK: store ptr %[[FULL_ARR_PTR]], ptr %[[OFFLOADPTRS]], align 8
--// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 2
-+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 3
- // CHECK: store i64 %[[FULL_ARR_SIZE]], ptr %[[OFFLOADSIZES]], align 8
- 
--// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
- // CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
--// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 4
- // CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADPTRS]], align 8
--// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 3
-+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 4
- // CHECK: store i64 %[[ARR_SECT_DESC_SIZE]], ptr %[[OFFLOADSIZES]], align 8
--// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
-+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
-+// CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 5
-+// CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADPTRS]], align 8
-+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
- // CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
--// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 4
-+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 6
- // CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADPTRS]], align 8
--// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
-+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
- // CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
--// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 5
-+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 7
- // CHECK: store ptr %[[ARR_SECT_PTR]], ptr %[[OFFLOADPTRS]], align 8
--// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 5
-+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 7
- // CHECK: store i64 %[[ARR_SECT_SIZE]], ptr %[[OFFLOADSIZES]], align 8
- 
--// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
-+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
- // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
--// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 6
-+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 8
- // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADPTRS]], align 8
--// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 6
-+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 8
- // CHECK: store i64 %[[SCALAR_DESC_SZ]], ptr %[[OFFLOADSIZES]], align 8
--// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
-+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
- // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
--// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 7
-+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 9
- // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADPTRS]], align 8
--// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
-+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 10
- // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
--// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 8
-+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 10
-+// CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADPTRS]], align 8
-+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 11
-+// CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADBASEPTRS]], align 8
-+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 11
- // CHECK: store ptr %[[SCALAR_PTR_LOAD]], ptr %[[OFFLOADPTRS]], align 8
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir	2024-11-19 12:49:07.461142157 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-host-eval.mlir	2024-11-14 15:29:47.958379280 -0600
 @@ -0,0 +1,46 @@
 +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 +
@@ -19037,21 +16485,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-host-
 +// CHECK: store [3 x i32] [i32 2000, i32 0, i32 0], ptr %[[THREADS_ADDR]]
 +
 +// CHECK: call i32 @__tgt_target_kernel(ptr @{{.*}}, i64 {{.*}}, i32 1000, i32 2000, ptr @{{.*}}, ptr %[[ARGS]])
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir	2024-11-19 12:55:59.054823897 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir	2024-11-19 12:49:07.462142154 -0500
-@@ -21,7 +21,7 @@
-     %9 = llvm.getelementptr %4[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, struct<(f32, i32)>, i32)>
-     %10 = omp.map.bounds lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%0 : i64) stride(%2 : i64) start_idx(%2 : i64)
-     %11 = omp.map.info var_ptr(%9 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%10) -> !llvm.ptr
--    %12 = omp.map.info var_ptr(%4 : !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, struct<(f32, i32)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%6, %8, %11 : [3, -1], [2, 1], [1, -1] : !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true}
-+    %12 = omp.map.info var_ptr(%4 : !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, struct<(f32, i32)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%6, %8, %11 : [3], [2, 1], [1] : !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true}
-     omp.target map_entries(%6 -> %arg0, %8 -> %arg1, %11 -> %arg2, %12 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-       omp.terminator
-     }
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir	2024-11-19 12:55:03.238005950 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir	2024-11-19 12:49:07.462142154 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir	2024-10-18 14:35:11.055133883 -0500
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir	2024-10-18 14:35:16.807072071 -0500
 @@ -52,7 +52,7 @@
  }
  
@@ -19081,9 +16517,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-paral
  // CHECK:         store ptr %[[IFCOND_ARG2]], ptr %[[IFCOND_TMP1:.*]], align 8
  // CHECK:         %[[IFCOND_TMP2:.*]] = load i32, ptr %[[IFCOND_TMP1]], align 4
  // CHECK:         %[[IFCOND_TMP3:.*]] = icmp ne i32 %[[IFCOND_TMP2]], 0
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir	2024-11-19 12:55:03.238005950 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir	2024-11-19 12:49:07.462142154 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir	2024-10-18 17:40:45.676857244 -0500
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir	2024-10-18 17:40:54.536766407 -0500
 @@ -4,10 +4,11 @@
  // for nested omp do loop inside omp target region
  
@@ -19117,9 +16553,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-paral
 +// CHECK:      attributes #[[ATTRS2]] = {
  // CHECK-SAME:  "target-cpu"="gfx90a"
  // CHECK-SAME:  "target-features"="+gfx9-insts,+wavefrontsize64"
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir	2024-11-19 12:55:03.238005950 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir	2024-11-19 12:49:07.462142154 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir	2024-10-18 14:35:11.055133883 -0500
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir	2024-10-18 14:35:16.807072071 -0500
 @@ -1,6 +1,6 @@
  // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
  
@@ -19139,9 +16575,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-regio
  // CHECK:        %[[TMP_A:.*]] = alloca ptr, align 8
  // CHECK:        store ptr %[[ADDR_A]], ptr %[[TMP_A]], align 8
  // CHECK:        %[[TMP_B:.*]] = alloca ptr, align 8
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir	2024-11-19 12:55:03.238005950 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir	2024-11-19 12:49:07.462142154 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir	2024-10-18 14:35:11.055133883 -0500
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir	2024-10-18 14:35:16.807072071 -0500
 @@ -1,6 +1,6 @@
  // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
  
@@ -19157,9 +16593,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-targe
 -// CHECK: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l19
 +// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l19
  // CHECK: ret void
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir	2024-04-11 17:07:42.717529266 -0400
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir	2024-05-13 09:10:51.191093400 -0400
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir	2024-08-27 20:04:13.000017626 -0500
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir	2024-08-28 08:37:25.128601153 -0500
 @@ -5,7 +5,9 @@
  
  module attributes {omp.is_target_device = true} {
@@ -19171,9 +16607,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-teams
      omp.teams {
        llvm.call @foo(%arg0) : (i32) -> ()
        omp.terminator
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir	2024-11-19 12:55:03.238005950 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir	2024-11-19 12:49:07.462142154 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir	2024-10-18 17:40:45.676857244 -0500
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir	2024-10-18 17:40:54.536766407 -0500
 @@ -4,7 +4,9 @@
  // for nested omp do loop with collapse clause inside omp target region
  
@@ -19194,9 +16630,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-wsloo
  // CHECK-SAME: i32 %[[NUM_THREADS:.*]], i32 0)
  
  // CHECK: define internal void @[[COLLAPSED_WSLOOP_BODY_FN]](i32 %[[LOOP_CNT:.*]], ptr %[[LOOP_BODY_ARG:.*]])
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir llvm-project/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir	2024-11-19 12:55:03.238005950 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir	2024-11-19 12:49:07.462142154 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir	2024-10-18 17:40:45.676857244 -0500
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir	2024-10-18 17:40:54.536766407 -0500
 @@ -4,7 +4,9 @@
  // for nested omp do loop inside omp target region
  
@@ -19241,9 +16677,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/omptarget-wsloo
 +// CHECK:   call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), ptr @[[LOOP_EMPTY_BODY_FN:.*]], ptr null, i32 9, i32 %[[NUM_THREADS:.*]], i32 0)
  
  // CHECK: define internal void @[[LOOP_EMPTY_BODY_FN]](i32 %[[LOOP_CNT:.*]])
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-llvm.mlir llvm-project/mlir/test/Target/LLVMIR/openmp-llvm.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-llvm.mlir	2024-11-19 12:55:03.239005947 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/openmp-llvm.mlir	2024-11-19 12:49:07.463142151 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/openmp-llvm.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/openmp-llvm.mlir	2024-11-14 15:28:49.062611118 -0600
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/openmp-llvm.mlir	2024-11-14 15:29:47.958379280 -0600
 @@ -700,7 +700,7 @@
  // CHECK-LABEL: @simd_simple_multiple
  llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) {
@@ -19262,9 +16698,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-llvm.mli
        %add = llvm.add %arg0, %arg1 : i32
        // CHECK:   store i32 %{{.*}}, ptr %{{.*}}, align 4
        // CHECK:   br label %{{.*}}
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-reduction.mlir llvm-project/mlir/test/Target/LLVMIR/openmp-reduction.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-reduction.mlir	2024-11-19 12:55:03.240005943 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/openmp-reduction.mlir	2024-11-19 12:49:07.464142148 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/openmp-reduction.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/openmp-reduction.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/openmp-reduction.mlir	2024-11-06 08:36:03.547158843 -0600
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/openmp-reduction.mlir	2024-11-06 08:36:36.835051640 -0600
 @@ -559,7 +559,7 @@
  // CHECK: define internal void @[[OUTLINED]]
  
@@ -19274,9 +16710,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-reductio
  // CHECK: store i32 0, ptr %[[PRIVATE]]
  
  // Loop exit:
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir llvm-project/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir	2024-11-19 12:55:03.240005943 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir	2024-11-19 12:49:07.464142148 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir	2024-10-18 14:35:11.055133883 -0500
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir	2024-10-18 14:35:16.807072071 -0500
 @@ -3,7 +3,7 @@
  // This tests check that target code nested inside a target data region which
  // has only use_device_ptr mapping corectly generates code on the device pass.
@@ -19295,9 +16731,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-target-u
    llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
      %0 = llvm.mlir.constant(1 : i64) : i64
      %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir llvm-project/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir	2024-11-19 12:55:03.240005943 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir	2024-11-19 12:49:07.464142148 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir	2024-10-18 14:35:11.055133883 -0500
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir	2024-10-18 14:35:16.807072071 -0500
 @@ -3,7 +3,7 @@
  // This tests the fix for https://github.com/llvm/llvm-project/issues/84606
  // We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash.
@@ -19307,9 +16743,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-task-tar
    llvm.func @_QQmain() attributes {fir.bindc_name = "main", omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
      %0 = llvm.mlir.constant(0 : i32) : i32
      %1 = llvm.mlir.constant(1 : i64) : i64
-diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-todo.mlir llvm-project/mlir/test/Target/LLVMIR/openmp-todo.mlir
---- llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-todo.mlir	2024-11-19 12:55:03.240005943 -0500
-+++ llvm-project/mlir/test/Target/LLVMIR/openmp-todo.mlir	2024-11-19 12:49:07.464142148 -0500
+diff -Naur -x .git llvm-project-trunk/mlir/test/Target/LLVMIR/openmp-todo.mlir llvm-project-trunk-atd/mlir/test/Target/LLVMIR/openmp-todo.mlir
+--- llvm-project-trunk/mlir/test/Target/LLVMIR/openmp-todo.mlir	2024-11-14 15:28:49.062611118 -0600
++++ llvm-project-trunk-atd/mlir/test/Target/LLVMIR/openmp-todo.mlir	2024-11-14 15:29:47.958379280 -0600
 @@ -66,10 +66,55 @@
  
  // -----
@@ -19440,9 +16876,9 @@ diff -Naur -x .git llvm-project.upstream/mlir/test/Target/LLVMIR/openmp-todo.mli
  }
  
  // -----
-diff -Naur -x .git llvm-project.upstream/offload/CMakeLists.txt llvm-project/offload/CMakeLists.txt
---- llvm-project.upstream/offload/CMakeLists.txt	2024-11-19 12:55:03.255005895 -0500
-+++ llvm-project/offload/CMakeLists.txt	2024-11-19 12:49:07.477142107 -0500
+diff -Naur -x .git llvm-project-trunk/offload/CMakeLists.txt llvm-project-trunk-atd/offload/CMakeLists.txt
+--- llvm-project-trunk/offload/CMakeLists.txt	2024-11-14 15:29:01.598561611 -0600
++++ llvm-project-trunk-atd/offload/CMakeLists.txt	2024-11-14 15:29:47.962379265 -0600
 @@ -89,9 +89,9 @@
  
    # Check for flang
@@ -19455,9 +16891,9 @@ diff -Naur -x .git llvm-project.upstream/offload/CMakeLists.txt llvm-project/off
    endif()
  
    # Set fortran test compiler if flang is found
-diff -Naur -x .git llvm-project.upstream/offload/test/offloading/fortran/target_private.f90 llvm-project/offload/test/offloading/fortran/target_private.f90
---- llvm-project.upstream/offload/test/offloading/fortran/target_private.f90	1969-12-31 19:00:00.000000000 -0500
-+++ llvm-project/offload/test/offloading/fortran/target_private.f90	2024-09-24 17:16:22.704860259 -0400
+diff -Naur -x .git llvm-project-trunk/offload/test/offloading/fortran/target_private.f90 llvm-project-trunk-atd/offload/test/offloading/fortran/target_private.f90
+--- llvm-project-trunk/offload/test/offloading/fortran/target_private.f90	1969-12-31 18:00:00.000000000 -0600
++++ llvm-project-trunk-atd/offload/test/offloading/fortran/target_private.f90	2024-09-24 18:08:02.991348301 -0500
 @@ -0,0 +1,23 @@
 +! Basic offloading test with a target region
 +! REQUIRES: flang, amdgpu
@@ -19482,9 +16918,9 @@ diff -Naur -x .git llvm-project.upstream/offload/test/offloading/fortran/target_
 +
 +! CHECK: x = 42
 +! CHECK: y = 84
-diff -Naur -x .git llvm-project.upstream/openmp/CMakeLists.txt llvm-project/openmp/CMakeLists.txt
---- llvm-project.upstream/openmp/CMakeLists.txt	2024-11-19 12:55:03.262005872 -0500
-+++ llvm-project/openmp/CMakeLists.txt	2024-06-17 09:19:12.209658750 -0400
+diff -Naur -x .git llvm-project-trunk/openmp/CMakeLists.txt llvm-project-trunk-atd/openmp/CMakeLists.txt
+--- llvm-project-trunk/openmp/CMakeLists.txt	2024-10-18 17:40:45.688857121 -0500
++++ llvm-project-trunk-atd/openmp/CMakeLists.txt	2024-08-27 20:12:53.921789702 -0500
 @@ -69,9 +69,9 @@
  
    # Check for flang
@@ -19497,9 +16933,9 @@ diff -Naur -x .git llvm-project.upstream/openmp/CMakeLists.txt llvm-project/open
    endif()
  
    # Set fortran test compiler if flang is found
-diff -Naur -x .git llvm-project.upstream/openmp/runtime/src/CMakeLists.txt llvm-project/openmp/runtime/src/CMakeLists.txt
---- llvm-project.upstream/openmp/runtime/src/CMakeLists.txt	2024-11-19 12:55:03.262005872 -0500
-+++ llvm-project/openmp/runtime/src/CMakeLists.txt	2024-11-19 12:49:07.479142101 -0500
+diff -Naur -x .git llvm-project-trunk/openmp/runtime/src/CMakeLists.txt llvm-project-trunk-atd/openmp/runtime/src/CMakeLists.txt
+--- llvm-project-trunk/openmp/runtime/src/CMakeLists.txt	2024-11-23 20:30:47.042122854 -0600
++++ llvm-project-trunk-atd/openmp/runtime/src/CMakeLists.txt	2024-10-29 11:08:04.541480766 -0500
 @@ -146,6 +146,11 @@
  set_source_files_properties(${LIBOMP_CXXFILES} PROPERTIES COMPILE_FLAGS "${LIBOMP_CONFIGURED_CXXFLAGS}")
  set_source_files_properties(${LIBOMP_ASMFILES} ${LIBOMP_GNUASMFILES} PROPERTIES COMPILE_FLAGS "${LIBOMP_CONFIGURED_ASMFLAGS}")