From 5dedac0b7d70c71569973a6204303a95085be4b4 Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Tue, 13 Aug 2024 12:29:25 -0500
Subject: [PATCH 1/9] GPU Device Variable on Intel GPUs (#4056)

This adds GPU device variable support on Intel GPUs using Intel oneAPI
compiler's experimental feature.

To make the user interface consistent, we have add a macro
AMREX_DEVICE_GLOBAL_VARIABLE. For example, the user can define a device
variable as follows for all GPUs and CPUs.

AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Real, my_dg1); // amrex::Real
my_dg1;
AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Real, 4, my_dg2); // amrex::Real
my_dg2[4];

Below are their declarations.

    extern AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Real, my_dg1);
    extern AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Real, 4, my_dg2);

GPU and CPU kernels can use the global variables if they see the
declarations.

We have also added two functions from copying data from and to device
global variables.

//! Copy `nbytes` bytes from host to device global variable. `offset` is
the
    //! offset in bytes from the start of the device global variable.
    template <typename T>
void memcpy_from_host_to_device_global_async (T& dg, const void* src,
                                                  std::size_t nbytes,
std::size_t offset = 0)

//! Copy `nbytes` bytes from device global variable to host. `offset` is
the
    //! offset in bytes from the start of the device global variable.
    template <typename T>
void memcpy_from_device_global_to_host_async (void* dst, T const& dg,
                                                  std::size_t nbytes,
std::size_t offset = 0)
---
 .github/workflows/intel.yml        | 12 ++++++--
 Src/Base/AMReX_GpuDevice.H         | 47 ++++++++++++++++++++++++++++++
 Src/Base/AMReX_GpuLaunch.nolint.H  |  7 ++---
 Src/Base/AMReX_GpuQualifiers.H     | 18 ++++++++++++
 Tests/CMakeLists.txt               |  3 +-
 Tests/DeviceGlobal/CMakeLists.txt  | 15 ++++++++++
 Tests/DeviceGlobal/GNUmakefile     | 23 +++++++++++++++
 Tests/DeviceGlobal/Make.package    |  2 ++
 Tests/DeviceGlobal/global_vars.H   |  7 +++++
 Tests/DeviceGlobal/global_vars.cpp |  7 +++++
 Tests/DeviceGlobal/init.cpp        | 27 +++++++++++++++++
 Tests/DeviceGlobal/main.cpp        | 20 +++++++++++++
 Tests/DeviceGlobal/work.cpp        | 40 +++++++++++++++++++++++++
 Tools/GNUMake/comps/dpcpp.mak      |  8 +++++
 14 files changed, 229 insertions(+), 7 deletions(-)
 create mode 100644 Tests/DeviceGlobal/CMakeLists.txt
 create mode 100644 Tests/DeviceGlobal/GNUmakefile
 create mode 100644 Tests/DeviceGlobal/Make.package
 create mode 100644 Tests/DeviceGlobal/global_vars.H
 create mode 100644 Tests/DeviceGlobal/global_vars.cpp
 create mode 100644 Tests/DeviceGlobal/init.cpp
 create mode 100644 Tests/DeviceGlobal/main.cpp
 create mode 100644 Tests/DeviceGlobal/work.cpp
diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index 168670eda3..4d1612d0ec 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -24,7 +24,11 @@ jobs:
         restore-keys: |
              ccache-${{ github.workflow }}-${{ github.job }}-git-
     - name: Build & Install
-      env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"}
+      # /tmp/icpx-2d34de0e47/global_vars-header-4390fb.h:25:36: error: zero size arrays are an extension [-Werror,-Wzero-length-array]
+      #    25 | const char* const kernel_names[] = {
+      #       |                                    ^
+      # 1 error generated.
+      env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-zero-length-array"}
       run: |
         export CCACHE_COMPRESS=1
         export CCACHE_COMPRESSLEVEL=10
@@ -68,7 +72,11 @@ jobs:
         restore-keys: |
              ccache-${{ github.workflow }}-${{ github.job }}-git-
     - name: Build & Install
-      env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"}
+      # /tmp/icpx-2d34de0e47/global_vars-header-4390fb.h:25:36: error: zero size arrays are an extension [-Werror,-Wzero-length-array]
+      #    25 | const char* const kernel_names[] = {
+      #       |                                    ^
+      # 1 error generated.
+      env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-zero-length-array"}
       run: |
         export CCACHE_COMPRESS=1
         export CCACHE_COMPRESSLEVEL=10
diff --git a/Src/Base/AMReX_GpuDevice.H b/Src/Base/AMReX_GpuDevice.H
index 7c17c918a7..a7aef5a924 100644
--- a/Src/Base/AMReX_GpuDevice.H
+++ b/Src/Base/AMReX_GpuDevice.H
@@ -14,6 +14,7 @@
 #include <algorithm>
 #include <array>
 #include <cstdlib>
+#include <cstring>
 #include <memory>
 
 #define AMREX_GPU_MAX_STREAMS 8
@@ -318,6 +319,52 @@ dtod_memcpy (void* p_d_dst, const void* p_d_src, const std::size_t sz) noexcept
 void hypreSynchronize ();
 #endif
 
+//! Copy `nbytes` bytes from host to device global variable. `offset` is the
+//! offset in bytes from the start of the device global variable.
+template <typename T>
+void memcpy_from_host_to_device_global_async (T& dg, const void* src,
+                                              std::size_t nbytes,
+                                              std::size_t offset = 0)
+{
+#if defined(AMREX_USE_CUDA)
+    AMREX_CUDA_SAFE_CALL(cudaMemcpyToSymbolAsync(dg, src, nbytes, offset,
+                                                 cudaMemcpyHostToDevice,
+                                                 Device::gpuStream()));
+#elif defined(AMREX_USE_HIP)
+    AMREX_HIP_SAFE_CALL(hipMemcpyToSymbolAsync(dg, src, nbytes, offset,
+                                               hipMemcpyHostToDevice,
+                                               Device::gpuStream()));
+#elif defined(AMREX_USE_SYCL)
+    Device::streamQueue().memcpy(dg, src, nbytes, offset);
+#else
+    auto* p = (char*)(&dg);
+    std::memcpy(p+offset, src, nbytes);
+#endif
+}
+
+//! Copy `nbytes` bytes from device global variable to host. `offset` is the
+//! offset in bytes from the start of the device global variable.
+template <typename T>
+void memcpy_from_device_global_to_host_async (void* dst, T const& dg,
+                                              std::size_t nbytes,
+                                              std::size_t offset = 0)
+{
+#if defined(AMREX_USE_CUDA)
+    AMREX_CUDA_SAFE_CALL(cudaMemcpyFromSymbolAsync(dst, dg, nbytes, offset,
+                                                   cudaMemcpyDeviceToHost,
+                                                   Device::gpuStream()));
+#elif defined(AMREX_USE_HIP)
+    AMREX_HIP_SAFE_CALL(hipMemcpyFromSymbolAsync(dst, dg, nbytes, offset,
+                                                  hipMemcpyDeviceToHost,
+                                                  Device::gpuStream()));
+#elif defined(AMREX_USE_SYCL)
+    Device::streamQueue().memcpy(dst, dg, nbytes, offset);
+#else
+    auto const* p = (char const*)(&dg);
+    std::memcpy(dst, p+offset, nbytes);
+#endif
+}
+
 }
 
 #endif
diff --git a/Src/Base/AMReX_GpuLaunch.nolint.H b/Src/Base/AMReX_GpuLaunch.nolint.H
index c7df173751..bb1bbb2453 100644
--- a/Src/Base/AMReX_GpuLaunch.nolint.H
+++ b/Src/Base/AMReX_GpuLaunch.nolint.H
@@ -1,9 +1,8 @@
 // Do not include this header anywhere other than AMReX_GpuLaunch.H.
 // The purpose of this file is to avoid clang-tidy.
 
-#define AMREX_WRONG_NUM_ARGS(...) static_assert(false,"Wrong number of arguments to macro")
-#define AMREX_GET_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME
-#define AMREX_LAUNCH_DEVICE_LAMBDA(...) AMREX_GET_MACRO(__VA_ARGS__,\
+#define AMREX_GET_LAUNCH_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME
+#define AMREX_LAUNCH_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\
                                                         AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3, \
                                                         AMREX_WRONG_NUM_ARGS, \
                                                         AMREX_WRONG_NUM_ARGS, \
@@ -14,7 +13,7 @@
                                                         AMREX_WRONG_NUM_ARGS, \
                                                         AMREX_WRONG_NUM_ARGS)(__VA_ARGS__)
 
-#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA(...) AMREX_GET_MACRO(__VA_ARGS__,\
+#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\
                                                         AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3, \
                                                         AMREX_WRONG_NUM_ARGS, \
                                                         AMREX_WRONG_NUM_ARGS, \
diff --git a/Src/Base/AMReX_GpuQualifiers.H b/Src/Base/AMReX_GpuQualifiers.H
index 4fba23a849..3e10bec54d 100644
--- a/Src/Base/AMReX_GpuQualifiers.H
+++ b/Src/Base/AMReX_GpuQualifiers.H
@@ -64,4 +64,22 @@
 # include <sycl/sycl.hpp>
 #endif
 
+#define AMREX_WRONG_NUM_ARGS(...) static_assert(false,"Wrong number of arguments to macro")
+
+#define AMREX_GET_DGV_MACRO(_1,_2,_3,NAME,...) NAME
+#define AMREX_DEVICE_GLOBAL_VARIABLE(...) AMREX_GET_DGV_MACRO(__VA_ARGS__,\
+                                              AMREX_DGVARR, AMREX_DGV,\
+                                              AMREX_WRONG_NUM_ARGS)(__VA_ARGS__)
+
+#ifdef AMREX_USE_SYCL
+# define AMREX_DGV(type,name) SYCL_EXTERNAL sycl::ext::oneapi::experimental::device_global<type> name
+# define AMREX_DGVARR(type,num,name) SYCL_EXTERNAL sycl::ext::oneapi::experimental::device_global<type[num]> name
+#elif defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
+# define AMREX_DGV(type,name) __device__ type name
+# define AMREX_DGVARR(type,num,name) __device__ type name[num]
+#else
+# define AMREX_DGV(type,name) type name
+# define AMREX_DGVARR(type,num,name) type name[num]
+#endif
+
 #endif
diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt
index 99d72633b6..01f187b664 100644
--- a/Tests/CMakeLists.txt
+++ b/Tests/CMakeLists.txt
@@ -121,7 +121,8 @@ else()
    #
    # List of subdirectories to search for CMakeLists.
    #
-   set( AMREX_TESTS_SUBDIRS AsyncOut MultiBlock Reinit Amr CLZ Parser Parser2 CTOParFor RoundoffDomain)
+   set( AMREX_TESTS_SUBDIRS Amr AsyncOut CLZ CTOParFor DeviceGlobal
+                            MultiBlock Parser Parser2 Reinit RoundoffDomain)
 
    if (AMReX_PARTICLES)
      list(APPEND AMREX_TESTS_SUBDIRS Particles)
diff --git a/Tests/DeviceGlobal/CMakeLists.txt b/Tests/DeviceGlobal/CMakeLists.txt
new file mode 100644
index 0000000000..990662d406
--- /dev/null
+++ b/Tests/DeviceGlobal/CMakeLists.txt
@@ -0,0 +1,15 @@
+if (( (AMReX_GPU_BACKEND STREQUAL "CUDA") OR
+      (AMReX_GPU_BACKEND STREQUAL "HIP" ) ) AND
+    (NOT AMReX_GPU_RDC))
+   return()
+endif ()
+
+foreach(D IN LISTS AMReX_SPACEDIM)
+    set(_sources     main.cpp global_vars.cpp init.cpp work.cpp)
+    set(_input_files)
+
+    setup_test(${D} _sources _input_files)
+
+    unset(_sources)
+    unset(_input_files)
+endforeach()
diff --git a/Tests/DeviceGlobal/GNUmakefile b/Tests/DeviceGlobal/GNUmakefile
new file mode 100644
index 0000000000..fd5fbd8f2c
--- /dev/null
+++ b/Tests/DeviceGlobal/GNUmakefile
@@ -0,0 +1,23 @@
+AMREX_HOME ?= ../../
+
+DEBUG	= FALSE
+
+DIM	= 3
+
+COMP    = gcc
+
+USE_CUDA  = TRUE
+USE_HIP   = FALSE
+USE_SYCL  = FALSE
+
+USE_MPI   = FALSE
+USE_OMP   = FALSE
+
+BL_NO_FORT = TRUE
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.defs
+
+include ./Make.package
+include $(AMREX_HOME)/Src/Base/Make.package
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.rules
diff --git a/Tests/DeviceGlobal/Make.package b/Tests/DeviceGlobal/Make.package
new file mode 100644
index 0000000000..8df45d1f81
--- /dev/null
+++ b/Tests/DeviceGlobal/Make.package
@@ -0,0 +1,2 @@
+CEXE_sources += main.cpp init.cpp work.cpp global_vars.cpp
+
diff --git a/Tests/DeviceGlobal/global_vars.H b/Tests/DeviceGlobal/global_vars.H
new file mode 100644
index 0000000000..88ce1f0c4f
--- /dev/null
+++ b/Tests/DeviceGlobal/global_vars.H
@@ -0,0 +1,7 @@
+#pragma once
+
+#include <AMReX_Gpu.H>
+#include <AMReX_INT.H>
+
+extern AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Long,    dg_x);
+extern AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Long, 4, dg_y);
diff --git a/Tests/DeviceGlobal/global_vars.cpp b/Tests/DeviceGlobal/global_vars.cpp
new file mode 100644
index 0000000000..485f41f164
--- /dev/null
+++ b/Tests/DeviceGlobal/global_vars.cpp
@@ -0,0 +1,7 @@
+
+#include "global_vars.H"
+
+// definitions of global variables
+
+AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Long,    dg_x);
+AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Long, 4, dg_y);
diff --git a/Tests/DeviceGlobal/init.cpp b/Tests/DeviceGlobal/init.cpp
new file mode 100644
index 0000000000..2850941f1e
--- /dev/null
+++ b/Tests/DeviceGlobal/init.cpp
@@ -0,0 +1,27 @@
+
+#include "global_vars.H"
+
+void init ()
+{
+    amrex::ParallelFor(1, [=] AMREX_GPU_DEVICE (int)
+    {
+        dg_x = 1;
+        for (int n = 0; n < 4; ++n) {
+            dg_y[n] = 100 + n;
+        }
+    });
+
+    amrex::Gpu::streamSynchronize();
+}
+
+void init2 ()
+{
+    amrex::Gpu::PinnedVector<amrex::Long> pv{2,200,201,202,203};
+    amrex::Gpu::memcpy_from_host_to_device_global_async
+        (dg_x, pv.data(), sizeof(amrex::Long));
+    amrex::Gpu::memcpy_from_host_to_device_global_async
+        (dg_y, pv.data()+1, sizeof(amrex::Long));
+    amrex::Gpu::memcpy_from_host_to_device_global_async
+        (dg_y, pv.data()+2, sizeof(amrex::Long)*3, sizeof(amrex::Long));
+    amrex::Gpu::streamSynchronize();
+}
diff --git a/Tests/DeviceGlobal/main.cpp b/Tests/DeviceGlobal/main.cpp
new file mode 100644
index 0000000000..b3b6778472
--- /dev/null
+++ b/Tests/DeviceGlobal/main.cpp
@@ -0,0 +1,20 @@
+#include <AMReX.H>
+#include <AMReX_Print.H>
+
+void init();
+void work();
+void init2();
+void work2();
+
+int main (int argc, char* argv[])
+{
+    amrex::Initialize(argc,argv);
+    {
+        init();
+        work();
+
+        init2();
+        work2();
+    }
+    amrex::Finalize();
+}
diff --git a/Tests/DeviceGlobal/work.cpp b/Tests/DeviceGlobal/work.cpp
new file mode 100644
index 0000000000..8350dad066
--- /dev/null
+++ b/Tests/DeviceGlobal/work.cpp
@@ -0,0 +1,40 @@
+
+#include "global_vars.H"
+
+void work ()
+{
+    amrex::Gpu::PinnedVector<amrex::Long> pv;
+    pv.resize(5,0);
+    auto* p = pv.data();
+    amrex::ParallelFor(1, [=] AMREX_GPU_DEVICE (int)
+    {
+        p[0] = dg_x;
+        for (int n = 0; n < 4; ++n) {
+            p[1+n] = dg_y[n];
+        }
+    });
+    amrex::Gpu::streamSynchronize();
+    AMREX_ALWAYS_ASSERT(pv[0] == 1 &&
+                        pv[1] == 100 &&
+                        pv[2] == 101 &&
+                        pv[3] == 102 &&
+                        pv[4] == 103);
+}
+
+void work2 ()
+{
+    amrex::Gpu::PinnedVector<amrex::Long> pv;
+    pv.resize(5,0);
+    amrex::Gpu::memcpy_from_device_global_to_host_async
+        (pv.data(), dg_x, sizeof(amrex::Long));
+    amrex::Gpu::memcpy_from_device_global_to_host_async
+        (pv.data()+1, dg_y, sizeof(amrex::Long));
+    amrex::Gpu::memcpy_from_device_global_to_host_async
+        (pv.data()+2, dg_y, sizeof(amrex::Long)*3, sizeof(amrex::Long));
+    amrex::Gpu::streamSynchronize();
+    AMREX_ALWAYS_ASSERT(pv[0] == 2 &&
+                        pv[1] == 200 &&
+                        pv[2] == 201 &&
+                        pv[3] == 202 &&
+                        pv[4] == 203);
+}
diff --git a/Tools/GNUMake/comps/dpcpp.mak b/Tools/GNUMake/comps/dpcpp.mak
index 3bcf5cb437..4e9a7e4652 100644
--- a/Tools/GNUMake/comps/dpcpp.mak
+++ b/Tools/GNUMake/comps/dpcpp.mak
@@ -45,6 +45,14 @@ ifeq ($(WARN_ALL),TRUE)
 
   warning_flags += -Wpedantic
 
+  # /tmp/icpx-2d34de0e47/global_vars-header-4390fb.h:25:36: error: zero size arrays are an extension [-Werror,-Wzero-length-array]
+  #    25 | const char* const kernel_names[] = {
+  #       |                                    ^
+  # 1 error generated.
+  #
+  # Seen in oneapi 2024.2.0 after adding Test/DeviceGlobal
+  warning_flags += -Wno-zero-length-array
+
   ifneq ($(WARN_SHADOW),FALSE)
     warning_flags += -Wshadow
   endif

From b7083f0b20d629d2fceacb4768e4c6e7fc80921b Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Wed, 14 Aug 2024 00:16:24 -0500
Subject: [PATCH 2/9] Make sure ChopGrids does not violate refinement ratio.
 (#4078)

When using ChopGrids on fine level BoxArrays, we must make sure the box
sizes are multiples of refinement ratios.
---
 Src/AmrCore/AMReX_AmrMesh.cpp | 18 +++++++++++++++++-
 Src/Base/AMReX_BoxArray.H     |  5 +++++
 Src/Base/AMReX_BoxArray.cpp   | 22 ++++++++++++++++++----
 3 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/Src/AmrCore/AMReX_AmrMesh.cpp b/Src/AmrCore/AMReX_AmrMesh.cpp
index 9dd03d51e4..388ba55dcf 100644
--- a/Src/AmrCore/AMReX_AmrMesh.cpp
+++ b/Src/AmrCore/AMReX_AmrMesh.cpp
@@ -471,6 +471,9 @@ AmrMesh::ChopGrids (int lev, BoxArray& ba, int target_size) const
     IntVect chunk = max_grid_size[lev];
     chunk.min(Geom(lev).Domain().length());
 
+    // Note that ba already satisfies the max_grid_size requirement and it's
+    // coarsenable if it's a fine level BoxArray.
+
     while (ba.size() < target_size)
     {
         IntVect chunk_prev = chunk;
@@ -485,11 +488,24 @@ AmrMesh::ChopGrids (int lev, BoxArray& ba, int target_size) const
             int idim = chunk_dir[idx].second;
             if (refine_grid_layout_dims[idim]) {
                 int new_chunk_size = chunk[idim] / 2;
+                int rr = (lev > 0) ? ref_ratio[lev-1][idim] : 1;
+                if (rr > 1) {
+                    new_chunk_size = (new_chunk_size/rr) * rr;
+                }
                 if (new_chunk_size != 0 &&
                     new_chunk_size%blocking_factor[lev][idim] == 0)
                 {
                     chunk[idim] = new_chunk_size;
-                    ba.maxSize(chunk);
+                    if (rr == 1) {
+                        ba.maxSize(chunk);
+                    } else {
+                        IntVect bf(1);
+                        bf[idim] = rr;
+                        // Note that only idim-direction will be chopped by
+                        // minmaxSize because the sizes in other directions
+                        // are already smaller than chunk.
+                        ba.minmaxSize(bf, chunk);
+                    }
                     break;
                 }
             }
diff --git a/Src/Base/AMReX_BoxArray.H b/Src/Base/AMReX_BoxArray.H
index 19cee3cefb..b3b339c33b 100644
--- a/Src/Base/AMReX_BoxArray.H
+++ b/Src/Base/AMReX_BoxArray.H
@@ -615,6 +615,11 @@ public:
 
     BoxArray& maxSize (const IntVect& block_size);
 
+    //! Forces each Box in BoxArray to have sizes >= min_size and <=
+    //! max_size. It's the caller's responsibility to make sure both the
+    //! BoxArray and max_size are coarsenable by min_size.
+    BoxArray& minmaxSize (const IntVect& min_size, const IntVect& max_size);
+
     //! Refine each Box in the BoxArray to the specified ratio.
     BoxArray& refine (int refinement_ratio);
 
diff --git a/Src/Base/AMReX_BoxArray.cpp b/Src/Base/AMReX_BoxArray.cpp
index ecffd06d8a..7f55cc5b99 100644
--- a/Src/Base/AMReX_BoxArray.cpp
+++ b/Src/Base/AMReX_BoxArray.cpp
@@ -555,12 +555,26 @@ BoxArray::maxSize (const IntVect& block_size)
     blst.maxSize(block_size);
     const int N = static_cast<int>(blst.size());
     if (size() != N) { // If size doesn't change, do nothing.
-        BoxList bak = (m_simplified_list) ? *m_simplified_list : BoxList();
+        auto bak = m_simplified_list;
         define(std::move(blst));
-        if (bak.isNotEmpty()) {
-            m_simplified_list = std::make_shared<BoxList>(std::move(bak));
-        }
+        m_simplified_list = bak;
+    }
+    return *this;
+}
+
+BoxArray&
+BoxArray::minmaxSize (const IntVect& min_size, const IntVect& max_size)
+{
+    AMREX_ASSERT(this->coarsenable(min_size) &&
+                 (max_size/min_size)*min_size == max_size);
+    std::shared_ptr<BoxList> bak;
+    if (m_bat.is_simple() && crseRatio() == IntVect::TheUnitVector()) {
+        bak = m_simplified_list;
     }
+    this->coarsen(min_size);
+    this->maxSize(max_size/min_size);
+    this->refine(min_size);
+    m_simplified_list = bak;
     return *this;
 }
 

From aa280e50fd043f9b47df3745907193a8ca17b568 Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Wed, 14 Aug 2024 10:39:02 -0500
Subject: [PATCH 3/9] Fix warnings by Coverity Scan (#4080)

Use move instead of copy.
---
 Src/Base/AMReX_BoxArray.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/Src/Base/AMReX_BoxArray.cpp b/Src/Base/AMReX_BoxArray.cpp
index 7f55cc5b99..9bca594352 100644
--- a/Src/Base/AMReX_BoxArray.cpp
+++ b/Src/Base/AMReX_BoxArray.cpp
@@ -555,9 +555,10 @@ BoxArray::maxSize (const IntVect& block_size)
     blst.maxSize(block_size);
     const int N = static_cast<int>(blst.size());
     if (size() != N) { // If size doesn't change, do nothing.
-        auto bak = m_simplified_list;
+        std::shared_ptr<BoxList> bak;
+        bak.swap(m_simplified_list);
         define(std::move(blst));
-        m_simplified_list = bak;
+        m_simplified_list = std::move(bak);
     }
     return *this;
 }
@@ -569,12 +570,12 @@ BoxArray::minmaxSize (const IntVect& min_size, const IntVect& max_size)
                  (max_size/min_size)*min_size == max_size);
     std::shared_ptr<BoxList> bak;
     if (m_bat.is_simple() && crseRatio() == IntVect::TheUnitVector()) {
-        bak = m_simplified_list;
+        bak.swap(m_simplified_list);
     }
     this->coarsen(min_size);
     this->maxSize(max_size/min_size);
     this->refine(min_size);
-    m_simplified_list = bak;
+    m_simplified_list = std::move(bak);
     return *this;
 }
 

From 6890ce0d34f72763a533bc6cfe3069494f21329f Mon Sep 17 00:00:00 2001
From: Miren Radia <32646026+mirenradia@users.noreply.github.com>
Date: Wed, 14 Aug 2024 17:39:35 +0100
Subject: [PATCH 4/9] BLBackTrace: Check for `addr2line` in path first (#4079)

## Summary
This fixes #4077 by first checking if `addr2line` or `eu-addr2line` is
in the `PATH` first and then falls back to `/usr/bin/addr2line` or
`/usr/bin/eu-addr2line` if not.

## Additional background
See #4077 for why using the hardcoded path is a problem.
---
 Src/Base/AMReX_BLBackTrace.cpp | 44 +++++++++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/Src/Base/AMReX_BLBackTrace.cpp b/Src/Base/AMReX_BLBackTrace.cpp
index d511a19272..d065bd71c0 100644
--- a/Src/Base/AMReX_BLBackTrace.cpp
+++ b/Src/Base/AMReX_BLBackTrace.cpp
@@ -13,12 +13,13 @@
 #include <AMReX_TinyProfiler.H>
 #endif
 
+#include <csignal>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
 #include <iostream>
 #include <sstream>
-#include <fstream>
-#include <cstring>
-#include <cstdio>
-#include <csignal>
 
 #if !(defined(_MSC_VER) && defined(__CUDACC__))
 //MSVC can't pre-processor cfenv with `Zc:preprocessor`
@@ -177,6 +178,18 @@ namespace {
         }
         return r;
     }
+
+#ifdef __linux__
+    bool command_exists(std::string const &cmd)
+    {
+        // command -v is part of POSIX so should be available
+        std::string check_command = "command -v " + cmd + " > /dev/null 2>&1";
+        int r = std::system(check_command.c_str());
+        // return value of std::system is implementation defined and can be
+        // decoded using WEXITSTATUS but it should be 0 on success
+        return r == 0;
+    }
+#endif
 }
 #endif
 
@@ -209,19 +222,32 @@ BLBackTrace::print_backtrace_info (FILE* f)
         int have_addr2line = 0;
         std::string eu_cmd;
         {
-            have_eu_addr2line = file_exists("/usr/bin/eu-addr2line");
+            if (command_exists("eu-addr2line")) {
+                have_eu_addr2line = 1;
+                eu_cmd = "eu-addr2line";
+            } else {
+                std::string eu_fallback_path = "/usr/bin/eu-addr2line";
+                have_eu_addr2line = file_exists(eu_fallback_path.c_str());
+                eu_cmd = std::move(eu_fallback_path);
+            }
             if (have_eu_addr2line) {
                 const pid_t pid = getpid();
                 // cmd = "/usr/bin/eu-addr2line -C -f -i --pretty-print -p "
-                eu_cmd = "/usr/bin/eu-addr2line -C -f -i -p "
-                    + std::to_string(pid);
+                eu_cmd += " -C -f -i -p " + std::to_string(pid);
             }
         }
         std::string cmd;
         {
-            have_addr2line = file_exists("/usr/bin/addr2line");
+            if (command_exists("addr2line")) {
+                have_addr2line = 1;
+                cmd = "addr2line";
+            } else {
+                std::string fallback_path = "/usr/bin/addr2line";
+                have_addr2line = file_exists(fallback_path.c_str());
+                cmd = std::move(fallback_path);
+            }
             if (have_addr2line) {
-                cmd = "/usr/bin/addr2line -Cpfie " + amrex::system::exename;
+                cmd += " -Cpfie " + amrex::system::exename;
             }
         }
 

From 8535675d4aef813693adb47c8703926805f2493a Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Wed, 14 Aug 2024 13:14:30 -0500
Subject: [PATCH 5/9] AMREX_ENUM and ParmParse support for enum class (#4069)

This adds AMREX_ENUM that can be used to define enum class with
reflection. The new feature allows us to support enum class in
ParmParse.
---
 Docs/sphinx_documentation/source/Basics.rst |  50 ++++++++
 Src/Base/AMReX_Enum.H                       |  81 ++++++++++++
 Src/Base/AMReX_ParmParse.H                  | 133 ++++++++++++++++++++
 Src/Base/AMReX_String.H                     |  30 +++++
 Src/Base/AMReX_String.cpp                   |  54 ++++++++
 Src/Base/AMReX_Utility.H                    |  12 +-
 Src/Base/AMReX_Utility.cpp                  |  39 ------
 Src/Base/CMakeLists.txt                     |   3 +
 Src/Base/Make.package                       |   4 +
 Tests/CMakeLists.txt                        |   2 +-
 Tests/Enum/CMakeLists.txt                   |   9 ++
 Tests/Enum/GNUmakefile                      |  24 ++++
 Tests/Enum/Make.package                     |   1 +
 Tests/Enum/inputs                           |  10 ++
 Tests/Enum/main.cpp                         |  99 +++++++++++++++
 15 files changed, 500 insertions(+), 51 deletions(-)
 create mode 100644 Src/Base/AMReX_Enum.H
 create mode 100644 Src/Base/AMReX_String.H
 create mode 100644 Src/Base/AMReX_String.cpp
 create mode 100644 Tests/Enum/CMakeLists.txt
 create mode 100644 Tests/Enum/GNUmakefile
 create mode 100644 Tests/Enum/Make.package
 create mode 100644 Tests/Enum/inputs
 create mode 100644 Tests/Enum/main.cpp

diff --git a/Docs/sphinx_documentation/source/Basics.rst b/Docs/sphinx_documentation/source/Basics.rst
index 5b9b137e30..97e68fd1e6 100644
--- a/Docs/sphinx_documentation/source/Basics.rst
+++ b/Docs/sphinx_documentation/source/Basics.rst
@@ -419,6 +419,56 @@ will become
     foo.a = 2
     foo.b = 2
 
+Enum Class
+----------
+
+.. versionadded:: 24.09
+   Enum class support in :cpp:`ParmParse`.
+
+AMReX provides a macro :cpp:`AMREX_ENUM` for defining :cpp:`enum class` that
+supports reflection. For example,
+
+.. highlight:: c++
+
+::
+
+   AMREX_ENUM(MyColor, red, green, blue);
+
+   void f ()
+   {
+       MyColor color = amrex::getEnum<MyColor>("red"); // MyColor::red
+       std::string name = amrex::getEnumNameString(MyColor::blue); // "blue"
+       std::vector<std::string> names = amrex::getEnumNameStrings<MyColor>();
+       // names = {"red", "green", "blue"};
+       std::string class_name = amrex::getEnumClassName<MyColor>(); // "MyColor"
+   }
+
+This allows us to read :cpp:`ParmParse` parameters into enum class objects.
+
+.. highlight:: python
+
+::
+
+   color1 = red
+   color2 = BLue
+
+The following code shows how to query the enumerators.
+
+.. highlight:: c++
+
+::
+
+   AMREX_ENUM(MyColor, none, red, green, blue);
+
+   void f (MyColor& c1, MyColor& c2)
+   {
+       ParmParse pp;
+       pp.query("color1", c1); // c1 becomes MyColor::red
+       pp.query_enum_case_insensitive("color2", c2); // c2 becomes MyColor::blue
+       MyColor default_color; // MyColor::none
+       pp.query("color3", default_color); // Still MyColor::none
+   }
+
 Overriding Parameters with Command-Line Arguments
 -------------------------------------------------
 
diff --git a/Src/Base/AMReX_Enum.H b/Src/Base/AMReX_Enum.H
new file mode 100644
index 0000000000..09583f5b73
--- /dev/null
+++ b/Src/Base/AMReX_Enum.H
@@ -0,0 +1,81 @@
+#ifndef AMREX_ENUM_H_
+#define AMREX_ENUM_H_
+
+#include <AMReX_String.H>
+
+#include <algorithm>
+#include <array>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+template <typename T>
+using amrex_enum_traits = decltype(amrex_get_enum_traits(std::declval<T>()));
+
+namespace amrex {
+    template <typename T, typename ET = amrex_enum_traits<T>,
+              std::enable_if_t<ET::value,int> = 0>
+    T getEnum (std::string_view const& s)
+    {
+        auto pos = ET::enum_names.find(s);
+        if (pos == std::string_view::npos) {
+            std::string error_msg("amrex::getEnum: Unknown enum: ");
+            error_msg.append(s).append(" in AMREX_ENUM(").append(ET::class_name)
+                .append(", ").append(ET::enum_names).append(").");
+            throw std::runtime_error(error_msg);
+        }
+        auto count = std::count(ET::enum_names.begin(),
+                                ET::enum_names.begin()+pos, ',');
+        return static_cast<T>(count);
+    }
+
+    template <typename T, typename ET = amrex_enum_traits<T>,
+              std::enable_if_t<ET::value,int> = 0>
+    std::string getEnumNameString (T const& v)
+    {
+        auto n = static_cast<int>(v);
+        std::size_t pos = 0;
+        for (int i = 0; i < n; ++i) {
+            pos = ET::enum_names.find(',', pos);
+            if (pos == std::string::npos) {
+                std::string error_msg("amrex::getEnum: Unknown enum value: ");
+                error_msg.append(std::to_string(n)).append(" in AMREX_ENUM(")
+                    .append(ET::class_name).append(", ").append(ET::enum_names)
+                    .append(").");
+                throw std::runtime_error(error_msg);
+            }
+            ++pos;
+        }
+        auto pos2 = ET::enum_names.find(',', pos);
+        return amrex::trim(std::string(ET::enum_names.substr(pos,pos2-pos)));
+    }
+
+    template <typename T, typename ET = amrex_enum_traits<T>,
+              std::enable_if_t<ET::value,int> = 0>
+    std::vector<std::string> getEnumNameStrings ()
+    {
+        return amrex::split(std::string(ET::enum_names), ", ");
+    }
+
+    template <typename T, typename ET = amrex_enum_traits<T>,
+              std::enable_if_t<ET::value,int> = 0>
+    std::string getEnumClassName ()
+    {
+        return std::string(ET::class_name);
+    }
+}
+
+#define AMREX_ENUM(CLASS, ...) \
+    enum class CLASS : int { __VA_ARGS__ }; \
+    struct CLASS##_EnumTraits { \
+        using enum_class_t = CLASS; \
+        static constexpr bool value = true; \
+        static constexpr std::string_view class_name{#CLASS}; \
+        static constexpr std::string_view enum_names{#__VA_ARGS__}; \
+    }; \
+    CLASS##_EnumTraits amrex_get_enum_traits(CLASS)
+
+#endif
diff --git a/Src/Base/AMReX_ParmParse.H b/Src/Base/AMReX_ParmParse.H
index c9e273643e..a844dcc1aa 100644
--- a/Src/Base/AMReX_ParmParse.H
+++ b/Src/Base/AMReX_ParmParse.H
@@ -3,6 +3,7 @@
 #include <AMReX_Config.H>
 
 #include <AMReX_BLassert.H>
+#include <AMReX_Enum.H>
 #include <AMReX_INT.H>
 #include <AMReX_IParser.H>
 #include <AMReX_Parser.H>
@@ -1118,6 +1119,138 @@ public:
         }
     }
 
+    /**
+     * \brief. Query enum value using given name.
+     *
+     * Here T is an enum class defined by AMREX_ENUM. The return value
+     * indicates if `name` is found. An exception is thrown, if the found
+     * string associated with the name cannot be converted to an enumerator
+     * (i.e., the string does not match any names in the definition of T).
+     */
+    template <typename T, typename ET = amrex_enum_traits<T>,
+              std::enable_if_t<ET::value,int> = 0>
+    int query (const char* name, T& ref)
+    {
+        std::string s;
+        int exist = this->query(name, s);
+        if (exist) {
+            try {
+                ref = amrex::getEnum<T>(s);
+            } catch (...) {
+                throw;
+            }
+        }
+        return exist;
+    }
+
+    /**
+     * \brief. Get enum value using given name.
+     *
+     * Here T is an enum class defined by AMREX_ENUM. It's a runtime error,
+     * if `name` is not found. An exception is thrown, if the found string
+     * associated with the name cannot be converted to an enumerator (i.e.,
+     * the string does not match any names in the definition of T).
+     */
+    template <typename T, typename ET = amrex_enum_traits<T>,
+              std::enable_if_t<ET::value,int> = 0>
+    void get (const char* name, T& ref)
+    {
+        std::string s;
+        this->get(name, s);
+        try {
+            ref = amrex::getEnum<T>(s);
+        } catch (...) {
+            throw;
+        }
+    }
+
+    //! Query an array of enum values using given name.
+    template <typename T, typename ET = amrex_enum_traits<T>,
+              std::enable_if_t<ET::value,int> = 0>
+    int queryarr (const char* name, std::vector<T>& ref)
+    {
+        std::vector<std::string> s;
+        int exist = this->queryarr(name, s);
+        if (exist) {
+            ref.resize(s.size());
+            for (std::size_t i = 0; i < s.size(); ++i) {
+                ref[i] = amrex::getEnum<T>(s[i]);
+            }
+        }
+        return exist;
+    }
+
+    //! Get an array of enum values using given name.
+    template <typename T, typename ET = amrex_enum_traits<T>,
+              std::enable_if_t<ET::value,int> = 0>
+    void getarr (const char* name, std::vector<T>& ref)
+    {
+        std::vector<std::string> s;
+        this->getarr(name, s);
+        ref.resize(s.size());
+        for (std::size_t i = 0; i < s.size(); ++i) {
+            ref[i] = amrex::getEnum<T>(s[i]);
+        }
+    }
+
+    /**
+     * \brief. Query enum value using given name.
+     *
+     * Here T is an enum class defined by AMREX_ENUM. The return value
+     * indicates if `name` is found. An exception is thrown, if the found
+     * string associated with the name cannot be case-insensitively
+     * converted to an enumerator (i.e., the found string, not `name`, does
+     * not case-insensitively match any names in the definition of T). If
+     * there are multiple matches, the first one is used.
+     */
+    template <typename T, typename ET = amrex_enum_traits<T>,
+              std::enable_if_t<ET::value,int> = 0>
+    int query_enum_case_insensitive (const char* name, T& ref)
+    {
+        std::string s;
+        int exist = this->query(name, s);
+        if (exist) {
+            s = amrex::toLower(s);
+            auto const& enum_names = amrex::getEnumNameStrings<T>();
+            auto found = std::find_if(enum_names.begin(), enum_names.end(),
+                                      [&] (std::string const& ename) {
+                                          return amrex::toLower(ename) == s;
+                                      });
+            if (found != enum_names.end()) {
+                ref = static_cast<T>(std::distance(enum_names.begin(), found));
+            } else {
+                std::string msg("query_enum_case_insensitive(\"");
+                msg.append(name).append("\",").append(amrex::getEnumClassName<T>())
+                    .append("&) failed.");
+                throw std::runtime_error(msg);
+            }
+        }
+        return exist;
+    }
+
+    /**
+     * \brief. Get enum value using given name.
+     *
+     * Here T is an enum class defined by AMREX_ENUM. It's a runtime error,
+     * if `name` is not found. An exception is thrown, if the found string
+     * associated with the name cannot be case-insensitively converted to an
+     * enumerator (i.e., the found string, not `name`, does not
+     * case-insensitively match any names in the definition of T). If there
+     * are multiple matches, the first one is used.
+     */
+    template <typename T, typename ET = amrex_enum_traits<T>,
+              std::enable_if_t<ET::value,int> = 0>
+    void get_enum_case_insensitive (const char* name, T& ref)
+    {
+        int exist = this->query_enum_case_insensitive(name, ref);
+        if (!exist) {
+            std::string msg("get_enum_case_insensitive(\"");
+            msg.append(name).append("\",").append(amrex::getEnumClassName<T>())
+                .append("&) failed.");
+            amrex::Abort(msg);
+        }
+    }
+
     //! Remove given name from the table.
     int remove (const char* name);
 
diff --git a/Src/Base/AMReX_String.H b/Src/Base/AMReX_String.H
new file mode 100644
index 0000000000..147b7ab187
--- /dev/null
+++ b/Src/Base/AMReX_String.H
@@ -0,0 +1,30 @@
+#ifndef AMREX_STRING_H_
+#define AMREX_STRING_H_
+#include <AMReX_Config.H>
+
+#include <string>
+#include <vector>
+
+namespace amrex {
+
+    //! Converts all characters of the string into lower case based on std::locale
+    std::string toLower (std::string s);
+
+    //! Converts all characters of the string into uppercase based on std::locale
+    std::string toUpper (std::string s);
+
+    //! Trim leading and trailing characters in the optional `space`
+    //! argument.
+    std::string trim (std::string s, std::string const& space = " \t");
+
+    //! Returns rootNNNN where NNNN == num.
+    std::string Concatenate (const std::string& root,
+                             int                num,
+                             int                mindigits = 5);
+
+    //! Split a string using given tokens in `sep`.
+    std::vector<std::string> split (std::string const& s,
+                                    std::string const& sep = " \t");
+}
+
+#endif
diff --git a/Src/Base/AMReX_String.cpp b/Src/Base/AMReX_String.cpp
new file mode 100644
index 0000000000..24dbce4532
--- /dev/null
+++ b/Src/Base/AMReX_String.cpp
@@ -0,0 +1,54 @@
+#include <AMReX_String.H>
+#include <AMReX_BLassert.H>
+
+#include <algorithm>
+#include <cctype>
+#include <iomanip>
+#include <sstream>
+
+namespace amrex {
+
+std::string toLower (std::string s)
+{
+    std::transform(s.begin(), s.end(), s.begin(),
+                   [](unsigned char c) { return std::tolower(c); });
+    return s;
+}
+
+std::string toUpper (std::string s)
+{
+    std::transform(s.begin(), s.end(), s.begin(),
+                   [](unsigned char c) { return std::toupper(c); });
+    return s;
+}
+
+std::string trim(std::string s, std::string const& space)
+{
+    const auto sbegin = s.find_first_not_of(space);
+    if (sbegin == std::string::npos) { return std::string{}; }
+    const auto send = s.find_last_not_of(space);
+    s = s.substr(sbegin, send-sbegin+1);
+    return s;
+}
+
+std::string Concatenate (const std::string& root, int num, int mindigits)
+{
+    BL_ASSERT(mindigits >= 0);
+    std::stringstream result;
+    result << root << std::setfill('0') << std::setw(mindigits) << num;
+    return result.str();
+}
+
+std::vector<std::string> split (std::string const& s, std::string const& sep)
+{
+    std::vector<std::string> result;
+    std::size_t pos_begin, pos_end = 0;
+    while ((pos_begin = s.find_first_not_of(sep,pos_end)) != std::string::npos) {
+        pos_end = s.find_first_of(sep,pos_begin);
+        result.push_back(s.substr(pos_begin,pos_end-pos_begin));
+        if (pos_end == std::string::npos) { break; }
+    }
+    return result;
+}
+
+}
diff --git a/Src/Base/AMReX_Utility.H b/Src/Base/AMReX_Utility.H
index 016b8adb0e..6bec276dbf 100644
--- a/Src/Base/AMReX_Utility.H
+++ b/Src/Base/AMReX_Utility.H
@@ -15,6 +15,7 @@
 #include <AMReX_Random.H>
 #include <AMReX_GpuQualifiers.H>
 #include <AMReX_FileSystem.H>
+#include <AMReX_String.H>
 
 #include <cfloat>
 #include <chrono>
@@ -44,17 +45,6 @@ namespace amrex
     const std::vector<std::string>& Tokenize (const std::string& instr,
                                               const std::string& separators);
 
-    //! Converts all characters of the string into lower or uppercase based on std::locale
-    std::string toLower (std::string s);
-    std::string toUpper (std::string s);
-
-    //! Trim leading and trailing white space
-    std::string trim (std::string s, std::string const& space = " \t");
-
-    //! Returns rootNNNN where NNNN == num.
-    std::string Concatenate (const std::string& root,
-                             int                num,
-                             int                mindigits = 5);
     /**
     *  \brief Creates the specified directories.  path may be either a full pathname
     *  or a relative pathname.  It will create all the directories in the
diff --git a/Src/Base/AMReX_Utility.cpp b/Src/Base/AMReX_Utility.cpp
index 1c79dfba92..aa3d8a2d16 100644
--- a/Src/Base/AMReX_Utility.cpp
+++ b/Src/Base/AMReX_Utility.cpp
@@ -16,7 +16,6 @@
 #include <cerrno>
 #include <cstdlib>
 #include <cstring>
-#include <cctype>
 #include <cmath>
 #include <cstdio>
 #include <ctime>
@@ -113,44 +112,6 @@ amrex::Tokenize (const std::string& instr,
     return tokens;
 }
 
-std::string
-amrex::toLower (std::string s)
-{
-    std::transform(s.begin(), s.end(), s.begin(),
-                   [](unsigned char c) { return std::tolower(c); });
-    return s;
-}
-
-std::string
-amrex::toUpper (std::string s)
-{
-    std::transform(s.begin(), s.end(), s.begin(),
-                   [](unsigned char c) { return std::toupper(c); });
-    return s;
-}
-
-std::string
-amrex::trim(std::string s, std::string const& space)
-{
-    const auto sbegin = s.find_first_not_of(space);
-    if (sbegin == std::string::npos) { return std::string{}; }
-    const auto send = s.find_last_not_of(space);
-    s = s.substr(sbegin, send-sbegin+1);
-    return s;
-}
-
-std::string
-amrex::Concatenate (const std::string& root,
-                     int                num,
-                     int                mindigits)
-{
-    BL_ASSERT(mindigits >= 0);
-    std::stringstream result;
-    result << root << std::setfill('0') << std::setw(mindigits) << num;
-    return result.str();
-}
-
-
 bool
 amrex::UtilCreateDirectory (const std::string& path,
                             mode_t mode, bool verbose)
diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt
index cebd1f9bce..0436ad032e 100644
--- a/Src/Base/CMakeLists.txt
+++ b/Src/Base/CMakeLists.txt
@@ -12,6 +12,7 @@ foreach(D IN LISTS AMReX_SPACEDIM)
        AMReX_Array.H
        AMReX_BlockMutex.H
        AMReX_BlockMutex.cpp
+       AMReX_Enum.H
        AMReX_GpuComplex.H
        AMReX_Vector.H
        AMReX_TableData.H
@@ -30,6 +31,8 @@ foreach(D IN LISTS AMReX_SPACEDIM)
        AMReX_parmparse_fi.cpp
        AMReX_ParmParse.H
        AMReX_Functional.H
+       AMReX_String.H
+       AMReX_String.cpp
        AMReX_Utility.H
        AMReX_Utility.cpp
        AMReX_FileSystem.H
diff --git a/Src/Base/Make.package b/Src/Base/Make.package
index dfbfb4f03a..b009ebf7d6 100644
--- a/Src/Base/Make.package
+++ b/Src/Base/Make.package
@@ -2,6 +2,7 @@
 AMREX_BASE=EXE
 
 C$(AMREX_BASE)_headers += AMReX_ccse-mpi.H AMReX_Algorithm.H AMReX_Any.H AMReX_Array.H
+C$(AMREX_BASE)_headers += AMReX_Enum.H
 C$(AMREX_BASE)_headers += AMReX_Vector.H AMReX_TableData.H AMReX_Tuple.H AMReX_Math.H
 
 C$(AMREX_BASE)_headers += AMReX_TypeList.H
@@ -22,6 +23,9 @@ C$(AMREX_BASE)_sources += AMReX_PODVector.cpp
 C$(AMREX_BASE)_headers += AMReX_BlockMutex.H
 C$(AMREX_BASE)_sources += AMReX_BlockMutex.cpp
 
+C$(AMREX_BASE)_headers += AMReX_String.H
+C$(AMREX_BASE)_sources += AMReX_String.cpp
+
 C$(AMREX_BASE)_sources += AMReX_ParmParse.cpp AMReX_parmparse_fi.cpp AMReX_Utility.cpp
 C$(AMREX_BASE)_headers += AMReX_ParmParse.H AMReX_Utility.H AMReX_BLassert.H AMReX_ArrayLim.H
 C$(AMREX_BASE)_headers += AMReX_Functional.H AMReX_Reduce.H AMReX_Scan.H AMReX_Partition.H
diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt
index 01f187b664..3f801cde2b 100644
--- a/Tests/CMakeLists.txt
+++ b/Tests/CMakeLists.txt
@@ -121,7 +121,7 @@ else()
    #
    # List of subdirectories to search for CMakeLists.
    #
-   set( AMREX_TESTS_SUBDIRS Amr AsyncOut CLZ CTOParFor DeviceGlobal
+   set( AMREX_TESTS_SUBDIRS Amr AsyncOut CLZ CTOParFor DeviceGlobal Enum
                             MultiBlock Parser Parser2 Reinit RoundoffDomain)
 
    if (AMReX_PARTICLES)
diff --git a/Tests/Enum/CMakeLists.txt b/Tests/Enum/CMakeLists.txt
new file mode 100644
index 0000000000..9c0e7f321d
--- /dev/null
+++ b/Tests/Enum/CMakeLists.txt
@@ -0,0 +1,9 @@
+foreach(D IN LISTS AMReX_SPACEDIM)
+    set(_sources     main.cpp)
+    set(_input_files inputs)
+
+    setup_test(${D} _sources _input_files)
+
+    unset(_sources)
+    unset(_input_files)
+endforeach()
diff --git a/Tests/Enum/GNUmakefile b/Tests/Enum/GNUmakefile
new file mode 100644
index 0000000000..d0d895ff52
--- /dev/null
+++ b/Tests/Enum/GNUmakefile
@@ -0,0 +1,24 @@
+AMREX_HOME := ../..
+
+DEBUG	= FALSE
+
+DIM	= 3
+
+COMP    = gcc
+
+USE_MPI   = FALSE
+USE_OMP   = FALSE
+USE_CUDA  = FALSE
+USE_HIP   = FALSE
+USE_SYCL  = FALSE
+
+BL_NO_FORT = TRUE
+
+TINY_PROFILE = FALSE
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.defs
+
+include ./Make.package
+include $(AMREX_HOME)/Src/Base/Make.package
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.rules
diff --git a/Tests/Enum/Make.package b/Tests/Enum/Make.package
new file mode 100644
index 0000000000..6b4b865e8f
--- /dev/null
+++ b/Tests/Enum/Make.package
@@ -0,0 +1 @@
+CEXE_sources += main.cpp
diff --git a/Tests/Enum/inputs b/Tests/Enum/inputs
new file mode 100644
index 0000000000..0972043658
--- /dev/null
+++ b/Tests/Enum/inputs
@@ -0,0 +1,10 @@
+
+color1 = red
+color2 = green
+color3 = blue
+color4 = greenxxx
+color5 = Blue
+
+colors = cyan yellow orange
+
+
diff --git a/Tests/Enum/main.cpp b/Tests/Enum/main.cpp
new file mode 100644
index 0000000000..6fb25b01a5
--- /dev/null
+++ b/Tests/Enum/main.cpp
@@ -0,0 +1,99 @@
+#include <AMReX.H>
+#include <AMReX_ParmParse.H>
+#include <AMReX_Print.H>
+
+using namespace amrex;
+
+AMREX_ENUM(MyColor,  red,  green, blue );
+
+namespace my_namespace {
+    AMREX_ENUM(MyColor, orange,     yellow,cyan );
+}
+
+int main (int argc, char* argv[])
+{
+    amrex::Initialize(argc, argv);
+    {
+        auto const& names = amrex::getEnumNameStrings<MyColor>();
+        auto const& names2 = amrex::getEnumNameStrings<my_namespace::MyColor>();
+        amrex::Print() << "colors:";
+        for (auto const& name : names) {
+            amrex::Print() << " " << name;
+        }
+        amrex::Print() << "\n";
+        amrex::Print() << "colors:";
+        for (auto const& name : names2) {
+            amrex::Print() << " " << name;
+        }
+        amrex::Print() << "\n";
+
+        ParmParse pp;
+        {
+            auto color = static_cast<MyColor>(999);
+            pp.query("color1", color);
+            amrex::Print() << "color = " << amrex::getEnumNameString(color) << '\n';
+            AMREX_ALWAYS_ASSERT(color == MyColor::red);
+        }
+        {
+            auto color = static_cast<MyColor>(999);
+            pp.get("color2", color);
+            amrex::Print() << "color = " << amrex::getEnumNameString(color) << '\n';
+            AMREX_ALWAYS_ASSERT(color == MyColor::green);
+        }
+        {
+            auto color = static_cast<MyColor>(999);
+            pp.get("color3", color);
+            amrex::Print() << "color = " << amrex::getEnumNameString(color) << '\n';
+            AMREX_ALWAYS_ASSERT(color == MyColor::blue);
+        }
+        {
+            auto color = static_cast<MyColor>(999);
+            try {
+                pp.query("color4", color);
+            } catch (std::runtime_error const& e) {
+                amrex::Print() << "As expected, " << e.what() << '\n';
+            }
+            AMREX_ALWAYS_ASSERT(color == static_cast<MyColor>(999));
+            try {
+                pp.get_enum_case_insensitive("color4", color);
+            } catch (std::runtime_error const& e) {
+                amrex::Print() << "As expected, " << e.what() << '\n';
+            }
+            AMREX_ALWAYS_ASSERT(color == static_cast<MyColor>(999));
+        }
+        {
+            auto color = static_cast<MyColor>(999);
+            try {
+                pp.query("color5", color);
+            } catch (std::runtime_error const& e) {
+                amrex::Print() << "As expected, " << e.what() << '\n';
+            }
+            AMREX_ALWAYS_ASSERT(color == static_cast<MyColor>(999));
+            pp.query_enum_case_insensitive("color5", color);
+            amrex::Print() << "color = " << amrex::getEnumNameString(color) << '\n';
+            AMREX_ALWAYS_ASSERT(color == MyColor::blue);
+        }
+        {
+            std::vector<my_namespace::MyColor> color;
+            pp.getarr("colors", color);
+            AMREX_ALWAYS_ASSERT(color.size() == 3 &&
+                                color[0] == my_namespace::MyColor::cyan &&
+                                color[1] == my_namespace::MyColor::yellow &&
+                                color[2] == my_namespace::MyColor::orange);
+            std::vector<my_namespace::MyColor> color2;
+            pp.queryarr("colors", color2);
+            AMREX_ALWAYS_ASSERT(color.size() == 3 &&
+                                color == color2 &&
+                                color[0] == my_namespace::MyColor::cyan &&
+                                color[1] == my_namespace::MyColor::yellow &&
+                                color[2] == my_namespace::MyColor::orange);
+            amrex::Print() << "colors:";
+            for (auto const& c : color) {
+                amrex::Print() << " " << amrex::getEnumNameString(c);
+            }
+            amrex::Print() << "\n";
+        }
+    }
+
+    amrex::Finalize();
+}

From b82296fbf8ff30a47eeb143575663080a284feda Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Thu, 15 Aug 2024 15:39:27 -0500
Subject: [PATCH 6/9] MLEBNodeFDLaplacian: Make it work with AMREX_USE_EB but
 with no EB (#4083)

This will allow the use of the linear solver when AMReX is built with EB
support at compile time but no EB is built at runtime.

X-Ref: https://github.com/ECP-WarpX/WarpX/pull/4865
---
 .../MLMG/AMReX_MLEBNodeFDLaplacian.cpp        | 82 +++++++++++--------
 1 file changed, 48 insertions(+), 34 deletions(-)

diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp
index 09bb6d87e1..eaaffd97cf 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp
+++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp
@@ -5,6 +5,7 @@
 #include <AMReX_MultiFabUtil.H>
 
 #ifdef AMREX_USE_EB
+#include <AMReX_EB2.H>
 #include <AMReX_EBMultiFabUtil.H>
 #endif
 
@@ -151,10 +152,14 @@ MLEBNodeFDLaplacian::define (const Vector<Geometry>& a_geom,
 std::unique_ptr<FabFactory<FArrayBox> >
 MLEBNodeFDLaplacian::makeFactory (int amrlev, int mglev) const
 {
-    return makeEBFabFactory(m_geom[amrlev][mglev],
-                            m_grids[amrlev][mglev],
-                            m_dmap[amrlev][mglev],
-                            {1,1,1}, EBSupport::full);
+    if (EB2::TopIndexSpaceIfPresent()) {
+        return makeEBFabFactory(m_geom[amrlev][mglev],
+                                m_grids[amrlev][mglev],
+                                m_dmap[amrlev][mglev],
+                                {1,1,1}, EBSupport::full);
+    } else {
+        return MLNodeLinOp::makeFactory(amrlev, mglev);
+    }
 }
 #endif
 
@@ -264,17 +269,19 @@ MLEBNodeFDLaplacian::prepareForSolve ()
     for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) {
         for (int mglev = 0; mglev < m_num_mg_levels[amrlev]; ++mglev) {
             const auto *factory = dynamic_cast<EBFArrayBoxFactory const*>(m_factory[amrlev][mglev].get());
-            auto const& levset_mf = factory->getLevelSet();
-            auto const& levset_ar = levset_mf.const_arrays();
-            auto& dmask_mf = *m_dirichlet_mask[amrlev][mglev];
-            auto const& dmask_ar = dmask_mf.arrays();
-            amrex::ParallelFor(dmask_mf,
-            [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
-            {
-                if (levset_ar[box_no](i,j,k) >= Real(0.0)) {
-                    dmask_ar[box_no](i,j,k) = -1;
-                }
-            });
+            if (factory) {
+                auto const& levset_mf = factory->getLevelSet();
+                auto const& levset_ar = levset_mf.const_arrays();
+                auto& dmask_mf = *m_dirichlet_mask[amrlev][mglev];
+                auto const& dmask_ar = dmask_mf.arrays();
+                amrex::ParallelFor(dmask_mf,
+                [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
+                {
+                    if (levset_ar[box_no](i,j,k) >= Real(0.0)) {
+                        dmask_ar[box_no](i,j,k) = -1;
+                    }
+                });
+            }
         }
     }
 #endif
@@ -359,8 +366,10 @@ MLEBNodeFDLaplacian::prepareForSolve ()
 void
 MLEBNodeFDLaplacian::scaleRHS (int amrlev, MultiFab& rhs) const
 {
-    auto const& dmask = *m_dirichlet_mask[amrlev][0];
     const auto *factory = dynamic_cast<EBFArrayBoxFactory const*>(m_factory[amrlev][0].get());
+    if (!factory) { return; }
+
+    auto const& dmask = *m_dirichlet_mask[amrlev][0];
     auto const& edgecent = factory->getEdgeCent();
 
 #ifdef AMREX_USE_OMP
@@ -407,9 +416,10 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa
 #ifdef AMREX_USE_EB
     const auto phieb = (m_in_solution_mode) ? m_s_phi_eb : Real(0.0);
     const auto *factory = dynamic_cast<EBFArrayBoxFactory const*>(m_factory[amrlev][mglev].get());
-    auto const& edgecent = factory->getEdgeCent();
-    auto const& levset_mf = factory->getLevelSet();
-    auto const& volfrac = factory->getVolFrac();
+    Array<const MultiCutFab*,AMREX_SPACEDIM> edgecent {AMREX_D_DECL(nullptr,nullptr,nullptr)};
+    if (factory) {
+        edgecent = factory->getEdgeCent();
+    }
 #endif
 
 #ifdef AMREX_USE_OMP
@@ -422,12 +432,12 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa
         Array4<Real> const& yarr = out.array(mfi);
         Array4<int const> const& dmarr = dmask.const_array(mfi);
 #ifdef AMREX_USE_EB
-        bool cutfab = edgecent[0]->ok(mfi);
-        if (cutfab) {
+        bool cutfab = edgecent[0] && edgecent[0]->ok(mfi);
+        if (cutfab && factory) { // clang-tidy is not that smart
             AMREX_D_TERM(Array4<Real const> const& ecx = edgecent[0]->const_array(mfi);,
                          Array4<Real const> const& ecy = edgecent[1]->const_array(mfi);,
                          Array4<Real const> const& ecz = edgecent[2]->const_array(mfi));
-            auto const& levset = levset_mf.const_array(mfi);
+            auto const& levset = factory->getVolFrac().const_array(mfi);
             if (phieb == std::numeric_limits<Real>::lowest()) {
                 auto const& phiebarr = m_phi_eb[amrlev].const_array(mfi);
 #if (AMREX_SPACEDIM == 2)
@@ -441,7 +451,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa
 #endif
                 if (m_has_sigma_mf) {
                     auto const& sigarr = m_sigma_mf[amrlev][mglev]->const_array(mfi);
-                    auto const& vfrc = volfrac.const_array(mfi);
+                    auto const& vfrc = factory->getVolFrac().const_array(mfi);
                     AMREX_HOST_DEVICE_FOR_3D(box, i, j, k,
                     {
                         mlebndfdlap_sig_adotx_eb(i,j,k,yarr,xarr,levset,dmarr,AMREX_D_DECL(ecx,ecy,ecz),
@@ -466,7 +476,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa
 #endif
                 if (m_has_sigma_mf) {
                     auto const& sigarr = m_sigma_mf[amrlev][mglev]->const_array(mfi);
-                    auto const& vfrc = volfrac.const_array(mfi);
+                    auto const& vfrc = factory->getVolFrac().const_array(mfi);
                     AMREX_HOST_DEVICE_FOR_3D(box, i, j, k,
                     {
                         mlebndfdlap_sig_adotx_eb(i,j,k,yarr,xarr,levset,dmarr,AMREX_D_DECL(ecx,ecy,ecz),
@@ -533,9 +543,10 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF
 
 #ifdef AMREX_USE_EB
         const auto *factory = dynamic_cast<EBFArrayBoxFactory const*>(m_factory[amrlev][mglev].get());
-        auto const& edgecent = factory->getEdgeCent();
-        auto const& levset_mf = factory->getLevelSet();
-        auto const& volfrac = factory->getVolFrac();
+        Array<const MultiCutFab*,AMREX_SPACEDIM> edgecent {AMREX_D_DECL(nullptr,nullptr,nullptr)};
+        if (factory) {
+            edgecent = factory->getEdgeCent();
+        }
 #endif
 
 #ifdef AMREX_USE_OMP
@@ -548,12 +559,12 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF
             Array4<Real const> const& rhsarr = rhs.const_array(mfi);
             Array4<int const> const& dmskarr = dmask.const_array(mfi);
 #ifdef AMREX_USE_EB
-            bool cutfab = edgecent[0]->ok(mfi);
-            if (cutfab) {
+            bool cutfab = edgecent[0] && edgecent[0]->ok(mfi);
+            if (cutfab && factory) { // clang-tidy is not that smart
                 AMREX_D_TERM(Array4<Real const> const& ecx = edgecent[0]->const_array(mfi);,
                              Array4<Real const> const& ecy = edgecent[1]->const_array(mfi);,
                              Array4<Real const> const& ecz = edgecent[2]->const_array(mfi));
-                auto const& levset = levset_mf.const_array(mfi);
+                auto const& levset = factory->getLevelSet().const_array(mfi);
 #if (AMREX_SPACEDIM == 2)
                 if (m_rz) {
                     AMREX_HOST_DEVICE_FOR_3D(box, i, j, k,
@@ -565,7 +576,7 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF
 #endif
                 if (m_has_sigma_mf) {
                     auto const& sigarr = m_sigma_mf[amrlev][mglev]->const_array(mfi);
-                    auto const& vfrc = volfrac.const_array(mfi);
+                    auto const& vfrc = factory->getVolFrac().const_array(mfi);
                     AMREX_HOST_DEVICE_FOR_3D(box, i, j, k,
                     {
                         mlebndfdlap_sig_gsrb_eb(i,j,k,solarr,rhsarr,levset,dmskarr,AMREX_D_DECL(ecx,ecy,ecz),
@@ -641,8 +652,10 @@ MLEBNodeFDLaplacian::compGrad (int amrlev, const Array<MultiFab*,AMREX_SPACEDIM>
     auto const& dmask = *m_dirichlet_mask[amrlev][mglev];
     const auto phieb = m_s_phi_eb;
     const auto *factory = dynamic_cast<EBFArrayBoxFactory const*>(m_factory[amrlev][mglev].get());
-    AMREX_ASSERT(factory);
-    auto const& edgecent = factory->getEdgeCent();
+    Array<const MultiCutFab*,AMREX_SPACEDIM> edgecent {AMREX_D_DECL(nullptr,nullptr,nullptr)};
+    if (factory) {
+        edgecent = factory->getEdgeCent();
+    }
 #endif
 
 #ifdef AMREX_USE_OMP
@@ -659,7 +672,7 @@ MLEBNodeFDLaplacian::compGrad (int amrlev, const Array<MultiFab*,AMREX_SPACEDIM>
                      Array4<Real> const& gpz = grad[2]->array(mfi);)
 #ifdef AMREX_USE_EB
         Array4<int const> const& dmarr = dmask.const_array(mfi);
-        bool cutfab = edgecent[0]->ok(mfi);
+        bool cutfab = edgecent[0] && edgecent[0]->ok(mfi);
         AMREX_D_TERM(Array4<Real const> const& ecx
                          = cutfab ? edgecent[0]->const_array(mfi) : Array4<Real const>{};,
                      Array4<Real const> const& ecy
@@ -741,6 +754,7 @@ MLEBNodeFDLaplacian::postSolve (Vector<MultiFab>& sol) const
     for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) {
         const auto phieb = m_s_phi_eb;
         const auto *factory = dynamic_cast<EBFArrayBoxFactory const*>(m_factory[amrlev][0].get());
+        if (!factory) { return; }
         auto const& levset_mf = factory->getLevelSet();
         auto const& levset_ar = levset_mf.const_arrays();
         MultiFab& mf = sol[amrlev];

From d9919c92db09ed9e927f9e4c7fb2ddcc161ab5ed Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Thu, 15 Aug 2024 18:04:52 -0700
Subject: [PATCH 7/9] MLEBNodeFDLaplacian: Regression (#4085)

## Summary

Fix copy-paste regression from #4083

## Additional background

https://github.com/AMReX-Codes/amrex/pull/4083#discussion_r1719149149
---
 Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp
index eaaffd97cf..af4a6a6d74 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp
+++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp
@@ -437,7 +437,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa
             AMREX_D_TERM(Array4<Real const> const& ecx = edgecent[0]->const_array(mfi);,
                          Array4<Real const> const& ecy = edgecent[1]->const_array(mfi);,
                          Array4<Real const> const& ecz = edgecent[2]->const_array(mfi));
-            auto const& levset = factory->getVolFrac().const_array(mfi);
+            auto const& levset = factory->getLevelSet().const_array(mfi);
             if (phieb == std::numeric_limits<Real>::lowest()) {
                 auto const& phiebarr = m_phi_eb[amrlev].const_array(mfi);
 #if (AMREX_SPACEDIM == 2)

From c061a14011d4d58755f330d15a20f7243863a90d Mon Sep 17 00:00:00 2001
From: Roelof Groenewald <40245517+roelof-groenewald@users.noreply.github.com>
Date: Fri, 16 Aug 2024 16:48:18 -0700
Subject: [PATCH 8/9] Add overload for `getParticleCell` that returns local
 cell index (#4081)

## Summary
In WarpX we have a few routines that requires the local cell index of
particles (i.e. the cell number in the current box). The AMReX function
`getParticleCell` currently only has logic to return the global cell
index (relative to the domain low end). This PR adds a new overload for
that function which does not shift the cell index based on the global
box index.

## Additional background
See https://github.com/ECP-WarpX/WarpX/pull/5118 for WarpX context.

## Checklist

The proposed changes:
- [ ] fix a bug or incorrect behavior in AMReX
- [x] add new capabilities to AMReX
- [ ] changes answers in the test suite to more than roundoff level
- [ ] are likely to significantly affect the results of downstream AMReX
users
- [ ] include documentation in the code and/or rst files, if appropriate
---
 Src/Particle/AMReX_ParticleUtil.H | 41 +++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H
index 6b31db7e99..b09f1d3583 100644
--- a/Src/Particle/AMReX_ParticleUtil.H
+++ b/Src/Particle/AMReX_ParticleUtil.H
@@ -356,17 +356,54 @@ struct GetParticleBin
     }
 };
 
+/**
+ * \brief Returns the cell index for a given particle using the
+ * provided lower bounds and cell sizes.
+ *
+ * This version indexes cells starting from 0 at the lower left corner of
+ * the provided lower bounds, i.e., it returns a local index.
+ *
+ * \tparam P a type of AMReX particle.
+ *
+ * \param p the particle for which the cell index is calculated
+ * \param plo the low end of the domain
+ * \param dxi cell sizes in each dimension
+ */
 template <typename P>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 IntVect getParticleCell (P const& p,
                          amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> const& plo,
-                         amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> const& dxi,
-                         const Box& domain) noexcept
+                         amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> const& dxi) noexcept
 {
     IntVect iv(
         AMREX_D_DECL(int(amrex::Math::floor((p.pos(0)-plo[0])*dxi[0])),
                      int(amrex::Math::floor((p.pos(1)-plo[1])*dxi[1])),
                      int(amrex::Math::floor((p.pos(2)-plo[2])*dxi[2]))));
+    return iv;
+}
+
+/**
+ * \brief Returns the cell index for a given particle using the
+ * provided lower bounds, cell sizes and global domain offset.
+ *
+ * This version indexes cells starting from 0 at the lower left corner of
+ * the simulation geometry, i.e., it returns a global index.
+ *
+ * \tparam P a type of AMReX particle.
+ *
+ * \param p the particle for which the cell index is calculated
+ * \param plo the low end of the domain
+ * \param dxi cell sizes in each dimension
+ * \param domain AMReX box in which the given particle resides
+ */
+template <typename P>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+IntVect getParticleCell (P const& p,
+                         amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> const& plo,
+                         amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> const& dxi,
+                         const Box& domain) noexcept
+{
+    IntVect iv = getParticleCell(p, plo, dxi);
     iv += domain.smallEnd();
     return iv;
 }

From 8222a13de06d3fe063a1f635623348905c2475f8 Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Sat, 17 Aug 2024 21:11:00 -0500
Subject: [PATCH 9/9] Multi-level Hypre: Fix bugs (#4089)

The stencil was wrong at the corner where the coarse/fine boundary meets
the domain boundary.
---
 Src/Extern/HYPRE/AMReX_HypreMLABecLap.H      |   3 +
 Src/Extern/HYPRE/AMReX_HypreMLABecLap.cpp    |  57 +++--
 Src/Extern/HYPRE/AMReX_HypreMLABecLap_2D_K.H |  34 ++-
 Src/Extern/HYPRE/AMReX_HypreMLABecLap_3D_K.H | 224 +++++++++++--------
 Src/Extern/HYPRE/AMReX_HypreMLABecLap_K.H    |   8 +-
 Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H   |  33 ++-
 Src/LinearSolvers/MLMG/AMReX_MLLinOp.H       |   2 +
 Src/LinearSolvers/MLMG/AMReX_MLMG.H          |  12 +
 8 files changed, 250 insertions(+), 123 deletions(-)

diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap.H b/Src/Extern/HYPRE/AMReX_HypreMLABecLap.H
index 04147207d3..6f687766b8 100644
--- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap.H
+++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap.H
@@ -39,6 +39,7 @@ public:
 
     void setVerbose (int v) { m_verbose = v; }
     void setMaxIter (int v) { m_maxiter = v; }
+    void setIsSingular (bool v) { m_is_singular = v; }
 
     void setup (Real a_ascalar, Real a_bscalar,
                 Vector<MultiFab const*> const& a_acoefs,
@@ -65,6 +66,7 @@ private:
 
     int m_verbose = 0;
     int m_maxiter = 200;
+    bool m_is_singular = false;
 
     Vector<Geometry> m_geom;
     Vector<BoxArray> m_grids;
@@ -87,6 +89,7 @@ private:
     Vector<std::unique_ptr<MLMGBndry>> m_bndry;
     Vector<std::unique_ptr<BndryRegister>> m_bndry_rhs;
     Vector<iMultiFab> m_fine_masks;
+    Vector<iMultiFab> m_crse_masks;
 
     // For coarse cells at coarse/fine interface. The vector is for AMR
     // levels.
diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap.cpp b/Src/Extern/HYPRE/AMReX_HypreMLABecLap.cpp
index d7621c6bd0..b726f307bb 100644
--- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap.cpp
+++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap.cpp
@@ -57,11 +57,16 @@ HypreMLABecLap::HypreMLABecLap (Vector<Geometry> a_geom,
     }
 
     m_fine_masks.resize(m_nlevels-1);
+    m_crse_masks.resize(m_nlevels-1);
     for (int ilev = 0; ilev < m_nlevels-1; ++ilev) {
         m_fine_masks[ilev] = amrex::makeFineMask(m_grids[ilev], m_dmap[ilev], IntVect(1),
                                                  m_grids[ilev+1], m_ref_ratio[ilev],
                                                  m_geom[ilev].periodicity(),
                                                  0, 1);
+        m_crse_masks[ilev].define(m_grids[ilev], m_dmap[ilev], 1, 1);
+        m_crse_masks[ilev].BuildMask(m_geom[ilev].Domain(),
+                                     m_geom[ilev].periodicity(),
+                                     1, 0, 0, 1);
     }
 
     m_c2f_offset_from.resize(m_nlevels-1);
@@ -588,12 +593,19 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar,
             const auto boxlo = amrex::lbound(vbx);
             const auto boxhi = amrex::ubound(vbx);
             // Set up stencil part of the matrix
+            auto fixed_pt = IntVect::TheMaxVector();
+            if (m_is_singular && m_nlevels-1 == ilev) {
+                auto const& box0 = m_grids.back()[0];
+                fixed_pt = box0.smallEnd() + 1;
+                // This cell does not have any non-stencil entries. So it's
+                // a good point for fixing singularity.
+            }
             amrex::fill(matfab,
             [=] AMREX_GPU_HOST_DEVICE (GpuArray<Real,stencil_size>& sten,
                                        int i, int j, int k)
             {
                 hypmlabeclap_mat(sten, i, j, k, boxlo, boxhi, sa, afab, sb, dx, bfabs,
-                                 bctype, bcl, bcmsk, bcval, bcrhs, ilev);
+                                 bctype, bcl, bcmsk, bcval, bcrhs, ilev, fixed_pt);
             });
 
             bool need_sync = true;
@@ -636,6 +648,7 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar,
                 auto const& c2f_offset_to_a = m_c2f_offset_to[ilev].const_array(mfi);
                 auto const& mat_a = matfab.array();
                 auto const& fine_mask = m_fine_masks[ilev].const_array(mfi);
+                auto const& crse_mask = m_crse_masks[ilev].const_array(mfi);
                 AMREX_D_TERM(auto offset_bx_a = m_offset_cf_bcoefs[ilev][0].isDefined()
                                               ? m_offset_cf_bcoefs[ilev][0].const_array(mfi)
                                               : Array4<int const>{};,
@@ -664,7 +677,7 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar,
                                      c2f_offset_to_a, dx, sb,
                                      AMREX_D_DECL(offset_bx_a,offset_by_a,offset_bz_a),
                                      AMREX_D_DECL(p_bx, p_by, p_bz),
-                                     fine_mask,rr);
+                                     fine_mask,rr, crse_mask);
                 });
                 if (c2f_total_from > 0) {
 #ifdef AMREX_USE_GPU
@@ -838,8 +851,8 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar,
         HYPRE_SStructSSAMGSetNumPostRelax(m_ss_solver, 4);
         HYPRE_SStructSSAMGSetNumCoarseRelax(m_ss_solver, 4);
 
-        HYPRE_SStructSSAMGSetLogging(m_ss_solver, m_verbose);
-        HYPRE_SStructSSAMGSetPrintLevel(m_ss_solver, m_verbose);
+        HYPRE_SStructSSAMGSetLogging(m_ss_solver, 1);
+        // HYPRE_SStructSSAMGSetPrintLevel(m_ss_solver, 1); /* 0: no, 1: setup, 2: solve, 3:both
 
 //        HYPRE_SStructSSAMGSetup(m_ss_solver, A, b, x);
 
@@ -854,15 +867,15 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar,
         HYPRE_BoomerAMGCreate(&m_solver);
 
         HYPRE_BoomerAMGSetOldDefault(m_solver); // Falgout coarsening with modified classical interpolation
-        HYPRE_BoomerAMGSetStrongThreshold(m_solver, (AMREX_SPACEDIM == 3) ? 0.6 : 0.25); // default is 0.25
+        HYPRE_BoomerAMGSetStrongThreshold(m_solver, (AMREX_SPACEDIM == 3) ? 0.4 : 0.25); // default is 0.25
         HYPRE_BoomerAMGSetRelaxOrder(m_solver, 1);   /* 0: default, natural order, 1: C/F relaxation order */
         HYPRE_BoomerAMGSetNumSweeps(m_solver, 2);   /* Sweeps on fine levels */
         // HYPRE_BoomerAMGSetFCycle(m_solver, 1); // default is 0
         // HYPRE_BoomerAMGSetCoarsenType(m_solver, 6);
         // HYPRE_BoomerAMGSetRelaxType(m_solver, 6);   /* G-S/Jacobi hybrid relaxation */
 
-        HYPRE_BoomerAMGSetLogging(m_solver, m_verbose);
-        HYPRE_BoomerAMGSetPrintLevel(m_solver, m_verbose);
+        HYPRE_BoomerAMGSetLogging(m_solver, 1);
+        // HYPRE_BoomerAMGSetPrintLevel(m_solver, 1); /* 0: no, 1: setup, 2: solve, 3:both
 
         HYPRE_ParCSRMatrix par_A;
         HYPRE_SStructMatrixGetObject(m_ss_A, (void**) &par_A);
@@ -956,6 +969,9 @@ void HypreMLABecLap::solve (Vector<MultiFab*> const& a_sol, Vector<MultiFab cons
             amrex::Abort("HypreMLABecLap::solve: TODO abstol > 0");
         }
 
+        HYPRE_Int num_iterations;
+        Real final_res;
+
 #ifdef AMREX_FEATURE_HYPRE_SSAMG
         if (m_hypre_solver_id == HypreSolverID::SSAMG)
         {
@@ -965,15 +981,13 @@ void HypreMLABecLap::solve (Vector<MultiFab*> const& a_sol, Vector<MultiFab cons
 
             HYPRE_SStructSSAMGSolve(m_ss_solver, m_ss_A, m_ss_b, m_ss_x);
 
-            if (m_verbose) {
-                HYPRE_Int num_iterations;
-                Real res;
-                HYPRE_SStructSSAMGGetNumIterations(m_ss_solver, &num_iterations);
-                HYPRE_SStructSSAMGGetFinalRelativeResidualNorm(m_ss_solver, &res);
+            HYPRE_SStructSSAMGGetNumIterations(m_ss_solver, &num_iterations);
+            HYPRE_SStructSSAMGGetFinalRelativeResidualNorm(m_ss_solver, &final_res);
 
+            if (m_verbose) {
                 amrex::Print() << "\n" << num_iterations
                                << " Hypre SSAMG Iterations, Relative Residual "
-                               << res << '\n';
+                               << final_res << '\n';
             }
         } else
 #endif
@@ -990,17 +1004,20 @@ void HypreMLABecLap::solve (Vector<MultiFab*> const& a_sol, Vector<MultiFab cons
 
             HYPRE_BoomerAMGSolve(m_solver, par_A, par_b, par_x);
 
-            if (m_verbose) {
-                HYPRE_Int num_iterations;
-                Real res;
-                HYPRE_BoomerAMGGetNumIterations(m_solver, &num_iterations);
-                HYPRE_BoomerAMGGetFinalRelativeResidualNorm(m_solver, &res);
+            HYPRE_BoomerAMGGetNumIterations(m_solver, &num_iterations);
+            HYPRE_BoomerAMGGetFinalRelativeResidualNorm(m_solver, &final_res);
 
+            if (m_verbose) {
                 amrex::Print() << "\n" << num_iterations
                                << " Hypre SS BoomerAMG Iterations, Relative Residual "
-                               << res << '\n';
+                               << final_res << '\n';
             }
         }
+
+        if (final_res > reltol) {
+            amrex::Abort("Hypre failed to converge after "+std::to_string(num_iterations)+
+                         " iterations. Final relative residual is "+std::to_string(final_res));
+        }
     }
 
     // Get solution
@@ -1044,8 +1061,6 @@ void HypreMLABecLap::solve (Vector<MultiFab*> const& a_sol, Vector<MultiFab cons
     for (int ilev = m_nlevels-2; ilev >= 0; --ilev) {
         amrex::average_down(*a_sol[ilev+1], *a_sol[ilev], 0, ncomp, m_ref_ratio[ilev]);
     }
-
-    // xxxxx abort if convergence is not reached.
 }
 
 #ifdef AMREX_USE_GPU
diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_2D_K.H b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_2D_K.H
index 57a37f19bf..7d083e7d98 100644
--- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_2D_K.H
+++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_2D_K.H
@@ -109,7 +109,7 @@ void hypmlabeclap_c2f (int i, int j, int k,
                        Array4<int const> const& offset_by,
                        Real const* bx, Real const* by,
                        Array4<int const> const& fine_mask,
-                       IntVect const& rr)
+                       IntVect const& rr, Array4<int const> const& crse_mask)
 {
     if (fine_mask(i,j,k)) {
         // Let's set off-diagonal elements to zero
@@ -159,9 +159,13 @@ void hypmlabeclap_c2f (int i, int j, int k,
                 Real xInt = Real(-0.5) + (irx+Real(0.5))/Real(rr[0]);
                 Real xc[3] = {Real(-1.0), Real(0.0), Real(1.0)};
                 Real ct[3] = {Real(0.0), Real(0.0), Real(0.0)};
-                if (fine_mask(i-1,j,k)) {
+                if ( fine_mask(i-1,j,k) ||
+                    !crse_mask(i-1,j,k))
+                {
                     poly_interp_coeff<2>(xInt, &(xc[1]), &(ct[1]));
-                } else if (fine_mask(i+1,j,k)) {
+                } else if ( fine_mask(i+1,j,k) ||
+                           !crse_mask(i+1,j,k))
+                {
                     poly_interp_coeff<2>(xInt, xc, ct);
                 } else {
                     poly_interp_coeff<3>(xInt, xc, ct);
@@ -202,9 +206,13 @@ void hypmlabeclap_c2f (int i, int j, int k,
                 Real yInt = Real(-0.5) + (iry+Real(0.5))/Real(rr[1]);
                 Real yc[3] = {Real(-1.0), Real(0.0), Real(1.0)};
                 Real ct[3] = {Real(0.0), Real(0.0), Real(0.0)};
-                if (fine_mask(i,j-1,k)) {
+                if ( fine_mask(i,j-1,k) ||
+                    !crse_mask(i,j-1,k))
+                {
                     poly_interp_coeff<2>(yInt, &(yc[1]), &(ct[1]));
-                } else if (fine_mask(i,j+1,k)) {
+                } else if ( fine_mask(i,j+1,k) ||
+                           !crse_mask(i,j+1,k))
+                {
                     poly_interp_coeff<2>(yInt, yc, ct);
                 } else {
                     poly_interp_coeff<3>(yInt, yc, ct);
@@ -244,9 +252,13 @@ void hypmlabeclap_c2f (int i, int j, int k,
                 Real yInt = Real(-0.5) + (iry+Real(0.5))/Real(rr[1]);
                 Real yc[3] = {Real(-1.0), Real(0.0), Real(1.0)};
                 Real ct[3] = {Real(0.0), Real(0.0), Real(0.0)};
-                if (fine_mask(i,j-1,k)) {
+                if ( fine_mask(i,j-1,k) ||
+                    !crse_mask(i,j-1,k))
+                {
                     poly_interp_coeff<2>(yInt, &(yc[1]), &(ct[1]));
-                } else if (fine_mask(i,j+1,k)) {
+                } else if ( fine_mask(i,j+1,k) ||
+                           !crse_mask(i,j+1,k))
+                {
                     poly_interp_coeff<2>(yInt, yc, ct);
                 } else {
                     poly_interp_coeff<3>(yInt, yc, ct);
@@ -286,9 +298,13 @@ void hypmlabeclap_c2f (int i, int j, int k,
                 Real xInt = Real(-0.5) + (irx+Real(0.5))/Real(rr[0]);
                 Real xc[3] = {Real(-1.0), Real(0.0), Real(1.0)};
                 Real ct[3] = {Real(0.0), Real(0.0), Real(0.0)};
-                if (fine_mask(i-1,j,k)) {
+                if ( fine_mask(i-1,j,k) ||
+                    !crse_mask(i-1,j,k))
+                {
                     poly_interp_coeff<2>(xInt, &(xc[1]), &(ct[1]));
-                } else if (fine_mask(i+1,j,k)) {
+                } else if ( fine_mask(i+1,j,k) ||
+                           !crse_mask(i+1,j,k))
+                {
                     poly_interp_coeff<2>(xInt, xc, ct);
                 } else {
                     poly_interp_coeff<3>(xInt, xc, ct);
diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_3D_K.H b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_3D_K.H
index 8e6e1a39b1..431650236f 100644
--- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_3D_K.H
+++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_3D_K.H
@@ -166,7 +166,7 @@ void hypmlabeclap_c2f (int i, int j, int k,
                        Array4<int const> const& offset_bz,
                        Real const* bx, Real const* by, Real const* bz,
                        Array4<int const> const& fine_mask,
-                       IntVect const& rr)
+                       IntVect const& rr, Array4<int const> const& crse_mask)
 {
     if (fine_mask(i,j,k)) {
         // Let's set off-diagonal elements to zero
@@ -191,7 +191,11 @@ void hypmlabeclap_c2f (int i, int j, int k,
             (! fine_mask(i,j-1,k-1)) &&
             (! fine_mask(i,j+1,k-1)) &&
             (! fine_mask(i,j-1,k+1)) &&
-            (! fine_mask(i,j+1,k+1)))
+            (! fine_mask(i,j+1,k+1)) &&
+            (  crse_mask(i,j-1,k-1)) &&
+            (  crse_mask(i,j+1,k-1)) &&
+            (  crse_mask(i,j-1,k+1)) &&
+            (  crse_mask(i,j+1,k+1)))
         {
             corner[0] = true;
         }
@@ -199,7 +203,11 @@ void hypmlabeclap_c2f (int i, int j, int k,
             (! fine_mask(i-1,j,k-1)) &&
             (! fine_mask(i+1,j,k-1)) &&
             (! fine_mask(i-1,j,k+1)) &&
-            (! fine_mask(i+1,j,k+1)))
+            (! fine_mask(i+1,j,k+1)) &&
+            (  crse_mask(i-1,j,k-1)) &&
+            (  crse_mask(i+1,j,k-1)) &&
+            (  crse_mask(i-1,j,k+1)) &&
+            (  crse_mask(i+1,j,k+1)))
         {
             corner[1] = true;
         }
@@ -207,7 +215,11 @@ void hypmlabeclap_c2f (int i, int j, int k,
             (! fine_mask(i-1,j-1,k)) &&
             (! fine_mask(i+1,j-1,k)) &&
             (! fine_mask(i-1,j+1,k)) &&
-            (! fine_mask(i+1,j+1,k)))
+            (! fine_mask(i+1,j+1,k)) &&
+            (  crse_mask(i-1,j-1,k)) &&
+            (  crse_mask(i+1,j-1,k)) &&
+            (  crse_mask(i-1,j+1,k)) &&
+            (  crse_mask(i+1,j+1,k)))
         {
             corner[2] = true;
         }
@@ -253,28 +265,34 @@ void hypmlabeclap_c2f (int i, int j, int k,
                 Real fac0 = fac*cc[0];
                 Real s0 = Real(1.0);
 
-                if (!fine_mask(i-1,j,k) && !fine_mask(i+1,j,k)) {
-                    s0 -= x*x;
-                    stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0));
-                    stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0));
-                } else if (!fine_mask(i-1,j,k)) {
+                if ( fine_mask(i-1,j,k) ||
+                    !crse_mask(i-1,j,k))
+                {
+                    s0 += Real(-0.5)*x;
+                    stencil(i,j,k)[2] += fac0*Real(0.5)*x;
+                } else if ( fine_mask(i+1,j,k) ||
+                           !crse_mask(i+1,j,k)) {
                     s0 += Real(0.5)*x;
                     stencil(i,j,k)[1] += fac0*Real(-0.5)*x;
                 } else {
-                    s0 += Real(-0.5)*x;
-                    stencil(i,j,k)[2] += fac0*Real(0.5)*x;
+                    s0 -= x*x;
+                    stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0));
+                    stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0));
                 }
 
-                if (!fine_mask(i,j-1,k) && !fine_mask(i,j+1,k)) {
-                    s0 -= y*y;
-                    stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0));
-                    stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0));
-                } else if (!fine_mask(i,j-1,k)) {
+                if ( fine_mask(i,j-1,k) ||
+                    !crse_mask(i,j-1,k))
+                {
+                    s0 += Real(-0.5)*y;
+                    stencil(i,j,k)[4] += fac0*Real(0.5)*y;
+                } else if ( fine_mask(i,j+1,k)  ||
+                           !crse_mask(i,j+1,k)) {
                     s0 += Real(0.5)*y;
                     stencil(i,j,k)[3] += fac0*Real(-0.5)*y;
                 } else {
-                    s0 += Real(-0.5)*y;
-                    stencil(i,j,k)[4] += fac0*Real(0.5)*y;
+                    s0 -= y*y;
+                    stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0));
+                    stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0));
                 }
 
                 stencil(i,j,k)[0] += fac0*s0;
@@ -322,28 +340,34 @@ void hypmlabeclap_c2f (int i, int j, int k,
                 Real fac0 = fac*cc[0];
                 Real s0 = Real(1.0);
 
-                if (!fine_mask(i-1,j,k) && !fine_mask(i+1,j,k)) {
-                    s0 -= x*x;
-                    stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0));
-                    stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0));
-                } else if (!fine_mask(i-1,j,k)) {
+                if ( fine_mask(i-1,j,k) ||
+                    !crse_mask(i-1,j,k))
+                {
+                    s0 += Real(-0.5)*x;
+                    stencil(i,j,k)[2] += fac0*Real(0.5)*x;
+                } else if ( fine_mask(i+1,j,k) ||
+                           !crse_mask(i+1,j,k)) {
                     s0 += Real(0.5)*x;
                     stencil(i,j,k)[1] += fac0*Real(-0.5)*x;
                 } else {
-                    s0 += Real(-0.5)*x;
-                    stencil(i,j,k)[2] += fac0*Real(0.5)*x;
+                    s0 -= x*x;
+                    stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0));
+                    stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0));
                 }
 
-                if (!fine_mask(i,j,k-1) && !fine_mask(i,j,k+1)) {
-                    s0 -= z*z;
-                    stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0));
-                    stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0));
-                } else if (!fine_mask(i,j,k-1)) {
+                if ( fine_mask(i,j,k-1) ||
+                    !crse_mask(i,j,k-1))
+                {
+                    s0 += Real(-0.5)*z;
+                    stencil(i,j,k)[6] += fac0*Real(0.5)*z;
+                } else if ( fine_mask(i,j,k+1)  ||
+                           !crse_mask(i,j,k+1)) {
                     s0 += Real(0.5)*z;
                     stencil(i,j,k)[5] += fac0*Real(-0.5)*z;
                 } else {
-                    s0 += Real(-0.5)*z;
-                    stencil(i,j,k)[6] += fac0*Real(0.5)*z;
+                    s0 -= z*z;
+                    stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0));
+                    stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0));
                 }
 
                 stencil(i,j,k)[0] += fac0*s0;
@@ -393,28 +417,34 @@ void hypmlabeclap_c2f (int i, int j, int k,
                 Real fac0 = fac*cc[0];
                 Real s0 = Real(1.0);
 
-                if (!fine_mask(i,j-1,k) && !fine_mask(i,j+1,k)) {
-                    s0 -= y*y;
-                    stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0));
-                    stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0));
-                } else if (!fine_mask(i,j-1,k)) {
+                if ( fine_mask(i,j-1,k) ||
+                    !crse_mask(i,j-1,k))
+                {
+                    s0 += Real(-0.5)*y;
+                    stencil(i,j,k)[4] += fac0*Real(0.5)*y;
+                } else if ( fine_mask(i,j+1,k) ||
+                           !crse_mask(i,j+1,k)) {
                     s0 += Real(0.5)*y;
                     stencil(i,j,k)[3] += fac0*Real(-0.5)*y;
                 } else {
-                    s0 += Real(-0.5)*y;
-                    stencil(i,j,k)[4] += fac0*Real(0.5)*y;
+                    s0 -= y*y;
+                    stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0));
+                    stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0));
                 }
 
-                if (!fine_mask(i,j,k-1) && !fine_mask(i,j,k+1)) {
-                    s0 -= z*z;
-                    stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0));
-                    stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0));
-                } else if (!fine_mask(i,j,k-1)) {
+                if ( fine_mask(i,j,k-1) ||
+                    !crse_mask(i,j,k-1))
+                {
+                    s0 += Real(-0.5)*z;
+                    stencil(i,j,k)[6] += fac0*Real(0.5)*z;
+                } else if ( fine_mask(i,j,k+1) ||
+                           !crse_mask(i,j,k+1)) {
                     s0 += Real(0.5)*z;
                     stencil(i,j,k)[5] += fac0*Real(-0.5)*z;
                 } else {
-                    s0 += Real(-0.5)*z;
-                    stencil(i,j,k)[6] += fac0*Real(0.5)*z;
+                    s0 -= z*z;
+                    stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0));
+                    stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0));
                 }
 
                 stencil(i,j,k)[0] += fac0*s0;
@@ -463,28 +493,34 @@ void hypmlabeclap_c2f (int i, int j, int k,
                 Real fac0 = fac*cc[0];
                 Real s0 = Real(1.0);
 
-                if (!fine_mask(i,j-1,k) && !fine_mask(i,j+1,k)) {
-                    s0 -= y*y;
-                    stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0));
-                    stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0));
-                } else if (!fine_mask(i,j-1,k)) {
+                if ( fine_mask(i,j-1,k) ||
+                    !crse_mask(i,j-1,k))
+                {
+                    s0 += Real(-0.5)*y;
+                    stencil(i,j,k)[4] += fac0*Real(0.5)*y;
+                } else if ( fine_mask(i,j+1,k)  ||
+                           !crse_mask(i,j+1,k)) {
                     s0 += Real(0.5)*y;
                     stencil(i,j,k)[3] += fac0*Real(-0.5)*y;
                 } else {
-                    s0 += Real(-0.5)*y;
-                    stencil(i,j,k)[4] += fac0*Real(0.5)*y;
+                    s0 -= y*y;
+                    stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0));
+                    stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0));
                 }
 
-                if (!fine_mask(i,j,k-1) && !fine_mask(i,j,k+1)) {
-                    s0 -= z*z;
-                    stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0));
-                    stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0));
-                } else if (!fine_mask(i,j,k-1)) {
+                if ( fine_mask(i,j,k-1) ||
+                    !crse_mask(i,j,k-1))
+                {
+                    s0 += Real(-0.5)*z;
+                    stencil(i,j,k)[6] += fac0*Real(0.5)*z;
+                } else if ( fine_mask(i,j,k+1) ||
+                           !crse_mask(i,j,k+1)) {
                     s0 += Real(0.5)*z;
                     stencil(i,j,k)[5] += fac0*Real(-0.5)*z;
                 } else {
-                    s0 += Real(-0.5)*z;
-                    stencil(i,j,k)[6] += fac0*Real(0.5)*z;
+                    s0 -= z*z;
+                    stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0));
+                    stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0));
                 }
 
                 stencil(i,j,k)[0] += fac0*s0;
@@ -534,28 +570,34 @@ void hypmlabeclap_c2f (int i, int j, int k,
                 Real fac0 = fac*cc[0];
                 Real s0 = Real(1.0);
 
-                if (!fine_mask(i-1,j,k) && !fine_mask(i+1,j,k)) {
-                    s0 -= x*x;
-                    stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0));
-                    stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0));
-                } else if (!fine_mask(i-1,j,k)) {
+                if ( fine_mask(i-1,j,k) ||
+                    !crse_mask(i-1,j,k))
+                {
+                    s0 += Real(-0.5)*x;
+                    stencil(i,j,k)[2] += fac0*Real(0.5)*x;
+                } else if ( fine_mask(i+1,j,k) ||
+                           !crse_mask(i+1,j,k)) {
                     s0 += Real(0.5)*x;
                     stencil(i,j,k)[1] += fac0*Real(-0.5)*x;
                 } else {
-                    s0 += Real(-0.5)*x;
-                    stencil(i,j,k)[2] += fac0*Real(0.5)*x;
+                    s0 -= x*x;
+                    stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0));
+                    stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0));
                 }
 
-                if (!fine_mask(i,j,k-1) && !fine_mask(i,j,k+1)) {
-                    s0 -= z*z;
-                    stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0));
-                    stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0));
-                } else if (!fine_mask(i,j,k-1)) {
+                if ( fine_mask(i,j,k-1) ||
+                    !crse_mask(i,j,k-1))
+                {
+                    s0 += Real(-0.5)*z;
+                    stencil(i,j,k)[6] += fac0*Real(0.5)*z;
+                } else if ( fine_mask(i,j,k+1) ||
+                           !crse_mask(i,j,k+1)) {
                     s0 += Real(0.5)*z;
                     stencil(i,j,k)[5] += fac0*Real(-0.5)*z;
                 } else {
-                    s0 += Real(-0.5)*z;
-                    stencil(i,j,k)[6] += fac0*Real(0.5)*z;
+                    s0 -= z*z;
+                    stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0));
+                    stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0));
                 }
 
                 stencil(i,j,k)[0] += fac0*s0;
@@ -605,28 +647,34 @@ void hypmlabeclap_c2f (int i, int j, int k,
                 Real fac0 = fac*cc[0];
                 Real s0 = Real(1.0);
 
-                if (!fine_mask(i-1,j,k) && !fine_mask(i+1,j,k)) {
-                    s0 -= x*x;
-                    stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0));
-                    stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0));
-                } else if (!fine_mask(i-1,j,k)) {
+                if ( fine_mask(i-1,j,k) ||
+                    !crse_mask(i-1,j,k))
+                {
+                    s0 += Real(-0.5)*x;
+                    stencil(i,j,k)[2] += fac0*Real(0.5)*x;
+                } else if ( fine_mask(i+1,j,k) ||
+                           !crse_mask(i+1,j,k)) {
                     s0 += Real(0.5)*x;
                     stencil(i,j,k)[1] += fac0*Real(-0.5)*x;
                 } else {
-                    s0 += Real(-0.5)*x;
-                    stencil(i,j,k)[2] += fac0*Real(0.5)*x;
+                    s0 -= x*x;
+                    stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0));
+                    stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0));
                 }
 
-                if (!fine_mask(i,j-1,k) && !fine_mask(i,j+1,k)) {
-                    s0 -= y*y;
-                    stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0));
-                    stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0));
-                } else if (!fine_mask(i,j-1,k)) {
+                if ( fine_mask(i,j-1,k) ||
+                    !crse_mask(i,j-1,k))
+                {
+                    s0 += Real(-0.5)*y;
+                    stencil(i,j,k)[4] += fac0*Real(0.5)*y;
+                } else if ( fine_mask(i,j+1,k) ||
+                           !crse_mask(i,j+1,k)) {
                     s0 += Real(0.5)*y;
                     stencil(i,j,k)[3] += fac0*Real(-0.5)*y;
                 } else {
-                    s0 += Real(-0.5)*y;
-                    stencil(i,j,k)[4] += fac0*Real(0.5)*y;
+                    s0 -= y*y;
+                    stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0));
+                    stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0));
                 }
 
                 stencil(i,j,k)[0] += fac0*s0;
diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_K.H b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_K.H
index ea38bf5037..129a6a989a 100644
--- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_K.H
+++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_K.H
@@ -22,7 +22,7 @@ void hypmlabeclap_mat (GpuArray<Real,2*AMREX_SPACEDIM+1>& sten, int i, int j, in
                        GpuArray<Array4<int const>, AMREX_SPACEDIM*2> const& bcmsk,
                        GpuArray<Array4<Real const>, AMREX_SPACEDIM*2> const& bcval,
                        GpuArray<Array4<Real>, AMREX_SPACEDIM*2> const& bcrhs,
-                       int level)
+                       int level, IntVect const& fixed_pt)
 {
     Real bxm = b[0] ? b[0](i  ,j  ,k  ) : Real(1.0);
     Real bxp = b[0] ? b[0](i+1,j  ,k  ) : Real(1.0);
@@ -223,6 +223,12 @@ void hypmlabeclap_mat (GpuArray<Real,2*AMREX_SPACEDIM+1>& sten, int i, int j, in
     }
 
 #endif
+
+    if (fixed_pt == IntVect(AMREX_D_DECL(i,j,k))) {
+        for (int n = 1; n < 2*AMREX_SPACEDIM+1; ++n) {
+            sten[n] = Real(0.0);
+        }
+    }
 }
 
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H
index 536d4c82b0..783d17bbac 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H
@@ -86,6 +86,8 @@ public:
     void solutionResidual (int amrlev, MF& resid, MF& x, const MF& b,
                                    const MF* crse_bcdata=nullptr) override;
 
+    void prepareForFluxes (int amrlev, const MF* crse_bcdata = nullptr) override;
+
     void correctionResidual (int amrlev, int mglev, MF& resid, MF& x, const MF& b,
                                      BCMode bc_mode, const MF* crse_bcdata=nullptr) final;
 
@@ -133,6 +135,10 @@ public:
 
     Vector<std::unique_ptr<MF> > m_robin_bcval;
 
+#ifdef AMREX_USE_HYPRE
+    void setInterpBndryHalfWidth (int w) { m_interpbndry_halfwidth = w; }
+#endif
+
 protected:
 
     bool m_has_metric_term = false;
@@ -202,6 +208,8 @@ private:
 
     void computeVolInv () const;
     mutable Vector<Vector<RT> > m_volinv; // used by solvability fix
+
+    int m_interpbndry_halfwidth = 2;
 };
 
 template <typename T>
@@ -472,7 +480,9 @@ MLCellLinOpT<MF>::defineBC ()
         bc_data.setVal(0.0);
 
         m_bndry_cor[amrlev]->setBndryValues(*m_crse_cor_br[amrlev], 0, bc_data, 0, 0, ncomp,
-                                            IntVect(this->m_amr_ref_ratio[amrlev-1]));
+                                            IntVect(this->m_amr_ref_ratio[amrlev-1]),
+                                            InterpBndryDataT<MF>::IBD_max_order_DEF,
+                                            m_interpbndry_halfwidth);
 
         Vector<Array<LinOpBCType,AMREX_SPACEDIM> > bclohi
             (ncomp,Array<LinOpBCType,AMREX_SPACEDIM>{{AMREX_D_DECL(BCType::Dirichlet,
@@ -544,7 +554,9 @@ MLCellLinOpT<MF>::setLevelBC (int amrlev, const MF* a_levelbcdata, const MF* rob
                 m_crse_sol_br[amrlev]->setVal(RT(0.0));
             }
             m_bndry_sol[amrlev]->setBndryValues(*m_crse_sol_br[amrlev], 0,
-                                                bcdata, 0, 0, ncomp, br_ref_ratio);
+                                                bcdata, 0, 0, ncomp, br_ref_ratio,
+                                                InterpBndryDataT<MF>::IBD_max_order_DEF,
+                                                m_interpbndry_halfwidth);
             br_ref_ratio = this->m_coarse_data_crse_ratio;
         }
         else
@@ -639,7 +651,9 @@ MLCellLinOpT<MF>::updateSolBC (int amrlev, const MF& crse_bcdata) const
     m_crse_sol_br[amrlev]->copyFrom(crse_bcdata, 0, 0, 0, ncomp,
                                     this->m_geom[amrlev-1][0].periodicity());
     m_bndry_sol[amrlev]->updateBndryValues(*m_crse_sol_br[amrlev], 0, 0, ncomp,
-                                           IntVect(this->m_amr_ref_ratio[amrlev-1]));
+                                           IntVect(this->m_amr_ref_ratio[amrlev-1]),
+                                           InterpBndryDataT<MF>::IBD_max_order_DEF,
+                                           m_interpbndry_halfwidth);
 }
 
 template <typename MF>
@@ -652,7 +666,9 @@ MLCellLinOpT<MF>::updateCorBC (int amrlev, const MF& crse_bcdata) const
     m_crse_cor_br[amrlev]->copyFrom(crse_bcdata, 0, 0, 0, ncomp,
                                     this->m_geom[amrlev-1][0].periodicity());
     m_bndry_cor[amrlev]->updateBndryValues(*m_crse_cor_br[amrlev], 0, 0, ncomp,
-                                           IntVect(this->m_amr_ref_ratio[amrlev-1]));
+                                           IntVect(this->m_amr_ref_ratio[amrlev-1]),
+                                           InterpBndryDataT<MF>::IBD_max_order_DEF,
+                                           m_interpbndry_halfwidth);
 }
 
 template <typename MF>
@@ -1210,6 +1226,15 @@ MLCellLinOpT<MF>::solutionResidual (int amrlev, MF& resid, MF& x, const MF& b,
     MF::Xpay(resid, RT(-1.0), b, 0, 0, ncomp, IntVect(0));
 }
 
+template <typename MF>
+void
+MLCellLinOpT<MF>::prepareForFluxes (int amrlev, const MF* crse_bcdata)
+{
+    if (crse_bcdata != nullptr) {
+        updateSolBC(amrlev, *crse_bcdata);
+    }
+}
+
 template <typename MF>
 void
 MLCellLinOpT<MF>::correctionResidual (int amrlev, int mglev, MF& resid, MF& x, const MF& b,
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H
index 03da0874e7..3cc623b761 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H
@@ -376,6 +376,8 @@ public:
     virtual void solutionResidual (int amrlev, MF& resid, MF& x, const MF& b,
                                    const MF* crse_bcdata=nullptr) = 0;
 
+    virtual void prepareForFluxes (int /*amrlev*/, const MF* /*crse_bcdata*/ = nullptr) {}
+
     /**
      * \brief Compute residual for the residual-correction form, resid = b - L(x)
      *
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H
index 5078383d5c..78b2ffdd3d 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H
@@ -164,6 +164,8 @@ public:
     void setHypreStrongThreshold (Real t) noexcept {hypre_strong_threshold = t;}
 #endif
 
+    void prepareForFluxes (Vector<MF const*> const& a_sol);
+
     template <typename AMF>
     void prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const& a_rhs);
 
@@ -538,6 +540,16 @@ MLMGT<MF>::solve (const Vector<AMF*>& a_sol, const Vector<AMF const*>& a_rhs,
     return composite_norminf;
 }
 
+template <typename MF>
+void
+MLMGT<MF>::prepareForFluxes (Vector<MF const*> const& a_sol)
+{
+    for (int alev = finest_amr_lev; alev >= 0; --alev) {
+        const MF* crse_bcdata = (alev > 0) ? a_sol[alev-1] : nullptr;
+        linop.prepareForFluxes(alev, crse_bcdata);
+    }
+}
+
 template <typename MF>
 template <typename AMF>
 void