From 5dedac0b7d70c71569973a6204303a95085be4b4 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 13 Aug 2024 12:29:25 -0500 Subject: [PATCH 1/9] GPU Device Variable on Intel GPUs (#4056) This adds GPU device variable support on Intel GPUs using Intel oneAPI compiler's experimental feature. To make the user interface consistent, we have add a macro AMREX_DEVICE_GLOBAL_VARIABLE. For example, the user can define a device variable as follows for all GPUs and CPUs. AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Real, my_dg1); // amrex::Real my_dg1; AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Real, 4, my_dg2); // amrex::Real my_dg2[4]; Below are their declarations. extern AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Real, my_dg1); extern AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Real, 4, my_dg2); GPU and CPU kernels can use the global variables if they see the declarations. We have also added two functions from copying data from and to device global variables. //! Copy `nbytes` bytes from host to device global variable. `offset` is the //! offset in bytes from the start of the device global variable. template void memcpy_from_host_to_device_global_async (T& dg, const void* src, std::size_t nbytes, std::size_t offset = 0) //! Copy `nbytes` bytes from device global variable to host. `offset` is the //! offset in bytes from the start of the device global variable. template void memcpy_from_device_global_to_host_async (void* dst, T const& dg, std::size_t nbytes, std::size_t offset = 0) --- .github/workflows/intel.yml | 12 ++++++-- Src/Base/AMReX_GpuDevice.H | 47 ++++++++++++++++++++++++++++++ Src/Base/AMReX_GpuLaunch.nolint.H | 7 ++--- Src/Base/AMReX_GpuQualifiers.H | 18 ++++++++++++ Tests/CMakeLists.txt | 3 +- Tests/DeviceGlobal/CMakeLists.txt | 15 ++++++++++ Tests/DeviceGlobal/GNUmakefile | 23 +++++++++++++++ Tests/DeviceGlobal/Make.package | 2 ++ Tests/DeviceGlobal/global_vars.H | 7 +++++ Tests/DeviceGlobal/global_vars.cpp | 7 +++++ Tests/DeviceGlobal/init.cpp | 27 +++++++++++++++++ Tests/DeviceGlobal/main.cpp | 20 +++++++++++++ Tests/DeviceGlobal/work.cpp | 40 +++++++++++++++++++++++++ Tools/GNUMake/comps/dpcpp.mak | 8 +++++ 14 files changed, 229 insertions(+), 7 deletions(-) create mode 100644 Tests/DeviceGlobal/CMakeLists.txt create mode 100644 Tests/DeviceGlobal/GNUmakefile create mode 100644 Tests/DeviceGlobal/Make.package create mode 100644 Tests/DeviceGlobal/global_vars.H create mode 100644 Tests/DeviceGlobal/global_vars.cpp create mode 100644 Tests/DeviceGlobal/init.cpp create mode 100644 Tests/DeviceGlobal/main.cpp create mode 100644 Tests/DeviceGlobal/work.cpp diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 168670eda3..4d1612d0ec 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -24,7 +24,11 @@ jobs: restore-keys: | ccache-${{ github.workflow }}-${{ github.job }}-git- - name: Build & Install - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"} + # /tmp/icpx-2d34de0e47/global_vars-header-4390fb.h:25:36: error: zero size arrays are an extension [-Werror,-Wzero-length-array] + # 25 | const char* const kernel_names[] = { + # | ^ + # 1 error generated. + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-zero-length-array"} run: | export CCACHE_COMPRESS=1 export CCACHE_COMPRESSLEVEL=10 @@ -68,7 +72,11 @@ jobs: restore-keys: | ccache-${{ github.workflow }}-${{ github.job }}-git- - name: Build & Install - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"} + # /tmp/icpx-2d34de0e47/global_vars-header-4390fb.h:25:36: error: zero size arrays are an extension [-Werror,-Wzero-length-array] + # 25 | const char* const kernel_names[] = { + # | ^ + # 1 error generated. + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-zero-length-array"} run: | export CCACHE_COMPRESS=1 export CCACHE_COMPRESSLEVEL=10 diff --git a/Src/Base/AMReX_GpuDevice.H b/Src/Base/AMReX_GpuDevice.H index 7c17c918a7..a7aef5a924 100644 --- a/Src/Base/AMReX_GpuDevice.H +++ b/Src/Base/AMReX_GpuDevice.H @@ -14,6 +14,7 @@ #include #include #include +#include #include #define AMREX_GPU_MAX_STREAMS 8 @@ -318,6 +319,52 @@ dtod_memcpy (void* p_d_dst, const void* p_d_src, const std::size_t sz) noexcept void hypreSynchronize (); #endif +//! Copy `nbytes` bytes from host to device global variable. `offset` is the +//! offset in bytes from the start of the device global variable. +template +void memcpy_from_host_to_device_global_async (T& dg, const void* src, + std::size_t nbytes, + std::size_t offset = 0) +{ +#if defined(AMREX_USE_CUDA) + AMREX_CUDA_SAFE_CALL(cudaMemcpyToSymbolAsync(dg, src, nbytes, offset, + cudaMemcpyHostToDevice, + Device::gpuStream())); +#elif defined(AMREX_USE_HIP) + AMREX_HIP_SAFE_CALL(hipMemcpyToSymbolAsync(dg, src, nbytes, offset, + hipMemcpyHostToDevice, + Device::gpuStream())); +#elif defined(AMREX_USE_SYCL) + Device::streamQueue().memcpy(dg, src, nbytes, offset); +#else + auto* p = (char*)(&dg); + std::memcpy(p+offset, src, nbytes); +#endif +} + +//! Copy `nbytes` bytes from device global variable to host. `offset` is the +//! offset in bytes from the start of the device global variable. +template +void memcpy_from_device_global_to_host_async (void* dst, T const& dg, + std::size_t nbytes, + std::size_t offset = 0) +{ +#if defined(AMREX_USE_CUDA) + AMREX_CUDA_SAFE_CALL(cudaMemcpyFromSymbolAsync(dst, dg, nbytes, offset, + cudaMemcpyDeviceToHost, + Device::gpuStream())); +#elif defined(AMREX_USE_HIP) + AMREX_HIP_SAFE_CALL(hipMemcpyFromSymbolAsync(dst, dg, nbytes, offset, + hipMemcpyDeviceToHost, + Device::gpuStream())); +#elif defined(AMREX_USE_SYCL) + Device::streamQueue().memcpy(dst, dg, nbytes, offset); +#else + auto const* p = (char const*)(&dg); + std::memcpy(dst, p+offset, nbytes); +#endif +} + } #endif diff --git a/Src/Base/AMReX_GpuLaunch.nolint.H b/Src/Base/AMReX_GpuLaunch.nolint.H index c7df173751..bb1bbb2453 100644 --- a/Src/Base/AMReX_GpuLaunch.nolint.H +++ b/Src/Base/AMReX_GpuLaunch.nolint.H @@ -1,9 +1,8 @@ // Do not include this header anywhere other than AMReX_GpuLaunch.H. // The purpose of this file is to avoid clang-tidy. -#define AMREX_WRONG_NUM_ARGS(...) static_assert(false,"Wrong number of arguments to macro") -#define AMREX_GET_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME -#define AMREX_LAUNCH_DEVICE_LAMBDA(...) AMREX_GET_MACRO(__VA_ARGS__,\ +#define AMREX_GET_LAUNCH_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME +#define AMREX_LAUNCH_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\ AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3, \ AMREX_WRONG_NUM_ARGS, \ AMREX_WRONG_NUM_ARGS, \ @@ -14,7 +13,7 @@ AMREX_WRONG_NUM_ARGS, \ AMREX_WRONG_NUM_ARGS)(__VA_ARGS__) -#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA(...) AMREX_GET_MACRO(__VA_ARGS__,\ +#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\ AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3, \ AMREX_WRONG_NUM_ARGS, \ AMREX_WRONG_NUM_ARGS, \ diff --git a/Src/Base/AMReX_GpuQualifiers.H b/Src/Base/AMReX_GpuQualifiers.H index 4fba23a849..3e10bec54d 100644 --- a/Src/Base/AMReX_GpuQualifiers.H +++ b/Src/Base/AMReX_GpuQualifiers.H @@ -64,4 +64,22 @@ # include #endif +#define AMREX_WRONG_NUM_ARGS(...) static_assert(false,"Wrong number of arguments to macro") + +#define AMREX_GET_DGV_MACRO(_1,_2,_3,NAME,...) NAME +#define AMREX_DEVICE_GLOBAL_VARIABLE(...) AMREX_GET_DGV_MACRO(__VA_ARGS__,\ + AMREX_DGVARR, AMREX_DGV,\ + AMREX_WRONG_NUM_ARGS)(__VA_ARGS__) + +#ifdef AMREX_USE_SYCL +# define AMREX_DGV(type,name) SYCL_EXTERNAL sycl::ext::oneapi::experimental::device_global name +# define AMREX_DGVARR(type,num,name) SYCL_EXTERNAL sycl::ext::oneapi::experimental::device_global name +#elif defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP) +# define AMREX_DGV(type,name) __device__ type name +# define AMREX_DGVARR(type,num,name) __device__ type name[num] +#else +# define AMREX_DGV(type,name) type name +# define AMREX_DGVARR(type,num,name) type name[num] +#endif + #endif diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index 99d72633b6..01f187b664 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -121,7 +121,8 @@ else() # # List of subdirectories to search for CMakeLists. # - set( AMREX_TESTS_SUBDIRS AsyncOut MultiBlock Reinit Amr CLZ Parser Parser2 CTOParFor RoundoffDomain) + set( AMREX_TESTS_SUBDIRS Amr AsyncOut CLZ CTOParFor DeviceGlobal + MultiBlock Parser Parser2 Reinit RoundoffDomain) if (AMReX_PARTICLES) list(APPEND AMREX_TESTS_SUBDIRS Particles) diff --git a/Tests/DeviceGlobal/CMakeLists.txt b/Tests/DeviceGlobal/CMakeLists.txt new file mode 100644 index 0000000000..990662d406 --- /dev/null +++ b/Tests/DeviceGlobal/CMakeLists.txt @@ -0,0 +1,15 @@ +if (( (AMReX_GPU_BACKEND STREQUAL "CUDA") OR + (AMReX_GPU_BACKEND STREQUAL "HIP" ) ) AND + (NOT AMReX_GPU_RDC)) + return() +endif () + +foreach(D IN LISTS AMReX_SPACEDIM) + set(_sources main.cpp global_vars.cpp init.cpp work.cpp) + set(_input_files) + + setup_test(${D} _sources _input_files) + + unset(_sources) + unset(_input_files) +endforeach() diff --git a/Tests/DeviceGlobal/GNUmakefile b/Tests/DeviceGlobal/GNUmakefile new file mode 100644 index 0000000000..fd5fbd8f2c --- /dev/null +++ b/Tests/DeviceGlobal/GNUmakefile @@ -0,0 +1,23 @@ +AMREX_HOME ?= ../../ + +DEBUG = FALSE + +DIM = 3 + +COMP = gcc + +USE_CUDA = TRUE +USE_HIP = FALSE +USE_SYCL = FALSE + +USE_MPI = FALSE +USE_OMP = FALSE + +BL_NO_FORT = TRUE + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +include ./Make.package +include $(AMREX_HOME)/Src/Base/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules diff --git a/Tests/DeviceGlobal/Make.package b/Tests/DeviceGlobal/Make.package new file mode 100644 index 0000000000..8df45d1f81 --- /dev/null +++ b/Tests/DeviceGlobal/Make.package @@ -0,0 +1,2 @@ +CEXE_sources += main.cpp init.cpp work.cpp global_vars.cpp + diff --git a/Tests/DeviceGlobal/global_vars.H b/Tests/DeviceGlobal/global_vars.H new file mode 100644 index 0000000000..88ce1f0c4f --- /dev/null +++ b/Tests/DeviceGlobal/global_vars.H @@ -0,0 +1,7 @@ +#pragma once + +#include +#include + +extern AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Long, dg_x); +extern AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Long, 4, dg_y); diff --git a/Tests/DeviceGlobal/global_vars.cpp b/Tests/DeviceGlobal/global_vars.cpp new file mode 100644 index 0000000000..485f41f164 --- /dev/null +++ b/Tests/DeviceGlobal/global_vars.cpp @@ -0,0 +1,7 @@ + +#include "global_vars.H" + +// definitions of global variables + +AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Long, dg_x); +AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Long, 4, dg_y); diff --git a/Tests/DeviceGlobal/init.cpp b/Tests/DeviceGlobal/init.cpp new file mode 100644 index 0000000000..2850941f1e --- /dev/null +++ b/Tests/DeviceGlobal/init.cpp @@ -0,0 +1,27 @@ + +#include "global_vars.H" + +void init () +{ + amrex::ParallelFor(1, [=] AMREX_GPU_DEVICE (int) + { + dg_x = 1; + for (int n = 0; n < 4; ++n) { + dg_y[n] = 100 + n; + } + }); + + amrex::Gpu::streamSynchronize(); +} + +void init2 () +{ + amrex::Gpu::PinnedVector pv{2,200,201,202,203}; + amrex::Gpu::memcpy_from_host_to_device_global_async + (dg_x, pv.data(), sizeof(amrex::Long)); + amrex::Gpu::memcpy_from_host_to_device_global_async + (dg_y, pv.data()+1, sizeof(amrex::Long)); + amrex::Gpu::memcpy_from_host_to_device_global_async + (dg_y, pv.data()+2, sizeof(amrex::Long)*3, sizeof(amrex::Long)); + amrex::Gpu::streamSynchronize(); +} diff --git a/Tests/DeviceGlobal/main.cpp b/Tests/DeviceGlobal/main.cpp new file mode 100644 index 0000000000..b3b6778472 --- /dev/null +++ b/Tests/DeviceGlobal/main.cpp @@ -0,0 +1,20 @@ +#include +#include + +void init(); +void work(); +void init2(); +void work2(); + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + { + init(); + work(); + + init2(); + work2(); + } + amrex::Finalize(); +} diff --git a/Tests/DeviceGlobal/work.cpp b/Tests/DeviceGlobal/work.cpp new file mode 100644 index 0000000000..8350dad066 --- /dev/null +++ b/Tests/DeviceGlobal/work.cpp @@ -0,0 +1,40 @@ + +#include "global_vars.H" + +void work () +{ + amrex::Gpu::PinnedVector pv; + pv.resize(5,0); + auto* p = pv.data(); + amrex::ParallelFor(1, [=] AMREX_GPU_DEVICE (int) + { + p[0] = dg_x; + for (int n = 0; n < 4; ++n) { + p[1+n] = dg_y[n]; + } + }); + amrex::Gpu::streamSynchronize(); + AMREX_ALWAYS_ASSERT(pv[0] == 1 && + pv[1] == 100 && + pv[2] == 101 && + pv[3] == 102 && + pv[4] == 103); +} + +void work2 () +{ + amrex::Gpu::PinnedVector pv; + pv.resize(5,0); + amrex::Gpu::memcpy_from_device_global_to_host_async + (pv.data(), dg_x, sizeof(amrex::Long)); + amrex::Gpu::memcpy_from_device_global_to_host_async + (pv.data()+1, dg_y, sizeof(amrex::Long)); + amrex::Gpu::memcpy_from_device_global_to_host_async + (pv.data()+2, dg_y, sizeof(amrex::Long)*3, sizeof(amrex::Long)); + amrex::Gpu::streamSynchronize(); + AMREX_ALWAYS_ASSERT(pv[0] == 2 && + pv[1] == 200 && + pv[2] == 201 && + pv[3] == 202 && + pv[4] == 203); +} diff --git a/Tools/GNUMake/comps/dpcpp.mak b/Tools/GNUMake/comps/dpcpp.mak index 3bcf5cb437..4e9a7e4652 100644 --- a/Tools/GNUMake/comps/dpcpp.mak +++ b/Tools/GNUMake/comps/dpcpp.mak @@ -45,6 +45,14 @@ ifeq ($(WARN_ALL),TRUE) warning_flags += -Wpedantic + # /tmp/icpx-2d34de0e47/global_vars-header-4390fb.h:25:36: error: zero size arrays are an extension [-Werror,-Wzero-length-array] + # 25 | const char* const kernel_names[] = { + # | ^ + # 1 error generated. + # + # Seen in oneapi 2024.2.0 after adding Test/DeviceGlobal + warning_flags += -Wno-zero-length-array + ifneq ($(WARN_SHADOW),FALSE) warning_flags += -Wshadow endif From b7083f0b20d629d2fceacb4768e4c6e7fc80921b Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 14 Aug 2024 00:16:24 -0500 Subject: [PATCH 2/9] Make sure ChopGrids does not violate refinement ratio. (#4078) When using ChopGrids on fine level BoxArrays, we must make sure the box sizes are multiples of refinement ratios. --- Src/AmrCore/AMReX_AmrMesh.cpp | 18 +++++++++++++++++- Src/Base/AMReX_BoxArray.H | 5 +++++ Src/Base/AMReX_BoxArray.cpp | 22 ++++++++++++++++++---- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/Src/AmrCore/AMReX_AmrMesh.cpp b/Src/AmrCore/AMReX_AmrMesh.cpp index 9dd03d51e4..388ba55dcf 100644 --- a/Src/AmrCore/AMReX_AmrMesh.cpp +++ b/Src/AmrCore/AMReX_AmrMesh.cpp @@ -471,6 +471,9 @@ AmrMesh::ChopGrids (int lev, BoxArray& ba, int target_size) const IntVect chunk = max_grid_size[lev]; chunk.min(Geom(lev).Domain().length()); + // Note that ba already satisfies the max_grid_size requirement and it's + // coarsenable if it's a fine level BoxArray. + while (ba.size() < target_size) { IntVect chunk_prev = chunk; @@ -485,11 +488,24 @@ AmrMesh::ChopGrids (int lev, BoxArray& ba, int target_size) const int idim = chunk_dir[idx].second; if (refine_grid_layout_dims[idim]) { int new_chunk_size = chunk[idim] / 2; + int rr = (lev > 0) ? ref_ratio[lev-1][idim] : 1; + if (rr > 1) { + new_chunk_size = (new_chunk_size/rr) * rr; + } if (new_chunk_size != 0 && new_chunk_size%blocking_factor[lev][idim] == 0) { chunk[idim] = new_chunk_size; - ba.maxSize(chunk); + if (rr == 1) { + ba.maxSize(chunk); + } else { + IntVect bf(1); + bf[idim] = rr; + // Note that only idim-direction will be chopped by + // minmaxSize because the sizes in other directions + // are already smaller than chunk. + ba.minmaxSize(bf, chunk); + } break; } } diff --git a/Src/Base/AMReX_BoxArray.H b/Src/Base/AMReX_BoxArray.H index 19cee3cefb..b3b339c33b 100644 --- a/Src/Base/AMReX_BoxArray.H +++ b/Src/Base/AMReX_BoxArray.H @@ -615,6 +615,11 @@ public: BoxArray& maxSize (const IntVect& block_size); + //! Forces each Box in BoxArray to have sizes >= min_size and <= + //! max_size. It's the caller's responsibility to make sure both the + //! BoxArray and max_size are coarsenable by min_size. + BoxArray& minmaxSize (const IntVect& min_size, const IntVect& max_size); + //! Refine each Box in the BoxArray to the specified ratio. BoxArray& refine (int refinement_ratio); diff --git a/Src/Base/AMReX_BoxArray.cpp b/Src/Base/AMReX_BoxArray.cpp index ecffd06d8a..7f55cc5b99 100644 --- a/Src/Base/AMReX_BoxArray.cpp +++ b/Src/Base/AMReX_BoxArray.cpp @@ -555,12 +555,26 @@ BoxArray::maxSize (const IntVect& block_size) blst.maxSize(block_size); const int N = static_cast(blst.size()); if (size() != N) { // If size doesn't change, do nothing. - BoxList bak = (m_simplified_list) ? *m_simplified_list : BoxList(); + auto bak = m_simplified_list; define(std::move(blst)); - if (bak.isNotEmpty()) { - m_simplified_list = std::make_shared(std::move(bak)); - } + m_simplified_list = bak; + } + return *this; +} + +BoxArray& +BoxArray::minmaxSize (const IntVect& min_size, const IntVect& max_size) +{ + AMREX_ASSERT(this->coarsenable(min_size) && + (max_size/min_size)*min_size == max_size); + std::shared_ptr bak; + if (m_bat.is_simple() && crseRatio() == IntVect::TheUnitVector()) { + bak = m_simplified_list; } + this->coarsen(min_size); + this->maxSize(max_size/min_size); + this->refine(min_size); + m_simplified_list = bak; return *this; } From aa280e50fd043f9b47df3745907193a8ca17b568 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 14 Aug 2024 10:39:02 -0500 Subject: [PATCH 3/9] Fix warnings by Coverity Scan (#4080) Use move instead of copy. --- Src/Base/AMReX_BoxArray.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Src/Base/AMReX_BoxArray.cpp b/Src/Base/AMReX_BoxArray.cpp index 7f55cc5b99..9bca594352 100644 --- a/Src/Base/AMReX_BoxArray.cpp +++ b/Src/Base/AMReX_BoxArray.cpp @@ -555,9 +555,10 @@ BoxArray::maxSize (const IntVect& block_size) blst.maxSize(block_size); const int N = static_cast(blst.size()); if (size() != N) { // If size doesn't change, do nothing. - auto bak = m_simplified_list; + std::shared_ptr bak; + bak.swap(m_simplified_list); define(std::move(blst)); - m_simplified_list = bak; + m_simplified_list = std::move(bak); } return *this; } @@ -569,12 +570,12 @@ BoxArray::minmaxSize (const IntVect& min_size, const IntVect& max_size) (max_size/min_size)*min_size == max_size); std::shared_ptr bak; if (m_bat.is_simple() && crseRatio() == IntVect::TheUnitVector()) { - bak = m_simplified_list; + bak.swap(m_simplified_list); } this->coarsen(min_size); this->maxSize(max_size/min_size); this->refine(min_size); - m_simplified_list = bak; + m_simplified_list = std::move(bak); return *this; } From 6890ce0d34f72763a533bc6cfe3069494f21329f Mon Sep 17 00:00:00 2001 From: Miren Radia <32646026+mirenradia@users.noreply.github.com> Date: Wed, 14 Aug 2024 17:39:35 +0100 Subject: [PATCH 4/9] BLBackTrace: Check for `addr2line` in path first (#4079) ## Summary This fixes #4077 by first checking if `addr2line` or `eu-addr2line` is in the `PATH` first and then falls back to `/usr/bin/addr2line` or `/usr/bin/eu-addr2line` if not. ## Additional background See #4077 for why using the hardcoded path is a problem. --- Src/Base/AMReX_BLBackTrace.cpp | 44 +++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/Src/Base/AMReX_BLBackTrace.cpp b/Src/Base/AMReX_BLBackTrace.cpp index d511a19272..d065bd71c0 100644 --- a/Src/Base/AMReX_BLBackTrace.cpp +++ b/Src/Base/AMReX_BLBackTrace.cpp @@ -13,12 +13,13 @@ #include #endif +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include #if !(defined(_MSC_VER) && defined(__CUDACC__)) //MSVC can't pre-processor cfenv with `Zc:preprocessor` @@ -177,6 +178,18 @@ namespace { } return r; } + +#ifdef __linux__ + bool command_exists(std::string const &cmd) + { + // command -v is part of POSIX so should be available + std::string check_command = "command -v " + cmd + " > /dev/null 2>&1"; + int r = std::system(check_command.c_str()); + // return value of std::system is implementation defined and can be + // decoded using WEXITSTATUS but it should be 0 on success + return r == 0; + } +#endif } #endif @@ -209,19 +222,32 @@ BLBackTrace::print_backtrace_info (FILE* f) int have_addr2line = 0; std::string eu_cmd; { - have_eu_addr2line = file_exists("/usr/bin/eu-addr2line"); + if (command_exists("eu-addr2line")) { + have_eu_addr2line = 1; + eu_cmd = "eu-addr2line"; + } else { + std::string eu_fallback_path = "/usr/bin/eu-addr2line"; + have_eu_addr2line = file_exists(eu_fallback_path.c_str()); + eu_cmd = std::move(eu_fallback_path); + } if (have_eu_addr2line) { const pid_t pid = getpid(); // cmd = "/usr/bin/eu-addr2line -C -f -i --pretty-print -p " - eu_cmd = "/usr/bin/eu-addr2line -C -f -i -p " - + std::to_string(pid); + eu_cmd += " -C -f -i -p " + std::to_string(pid); } } std::string cmd; { - have_addr2line = file_exists("/usr/bin/addr2line"); + if (command_exists("addr2line")) { + have_addr2line = 1; + cmd = "addr2line"; + } else { + std::string fallback_path = "/usr/bin/addr2line"; + have_addr2line = file_exists(fallback_path.c_str()); + cmd = std::move(fallback_path); + } if (have_addr2line) { - cmd = "/usr/bin/addr2line -Cpfie " + amrex::system::exename; + cmd += " -Cpfie " + amrex::system::exename; } } From 8535675d4aef813693adb47c8703926805f2493a Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 14 Aug 2024 13:14:30 -0500 Subject: [PATCH 5/9] AMREX_ENUM and ParmParse support for enum class (#4069) This adds AMREX_ENUM that can be used to define enum class with reflection. The new feature allows us to support enum class in ParmParse. --- Docs/sphinx_documentation/source/Basics.rst | 50 ++++++++ Src/Base/AMReX_Enum.H | 81 ++++++++++++ Src/Base/AMReX_ParmParse.H | 133 ++++++++++++++++++++ Src/Base/AMReX_String.H | 30 +++++ Src/Base/AMReX_String.cpp | 54 ++++++++ Src/Base/AMReX_Utility.H | 12 +- Src/Base/AMReX_Utility.cpp | 39 ------ Src/Base/CMakeLists.txt | 3 + Src/Base/Make.package | 4 + Tests/CMakeLists.txt | 2 +- Tests/Enum/CMakeLists.txt | 9 ++ Tests/Enum/GNUmakefile | 24 ++++ Tests/Enum/Make.package | 1 + Tests/Enum/inputs | 10 ++ Tests/Enum/main.cpp | 99 +++++++++++++++ 15 files changed, 500 insertions(+), 51 deletions(-) create mode 100644 Src/Base/AMReX_Enum.H create mode 100644 Src/Base/AMReX_String.H create mode 100644 Src/Base/AMReX_String.cpp create mode 100644 Tests/Enum/CMakeLists.txt create mode 100644 Tests/Enum/GNUmakefile create mode 100644 Tests/Enum/Make.package create mode 100644 Tests/Enum/inputs create mode 100644 Tests/Enum/main.cpp diff --git a/Docs/sphinx_documentation/source/Basics.rst b/Docs/sphinx_documentation/source/Basics.rst index 5b9b137e30..97e68fd1e6 100644 --- a/Docs/sphinx_documentation/source/Basics.rst +++ b/Docs/sphinx_documentation/source/Basics.rst @@ -419,6 +419,56 @@ will become foo.a = 2 foo.b = 2 +Enum Class +---------- + +.. versionadded:: 24.09 + Enum class support in :cpp:`ParmParse`. + +AMReX provides a macro :cpp:`AMREX_ENUM` for defining :cpp:`enum class` that +supports reflection. For example, + +.. highlight:: c++ + +:: + + AMREX_ENUM(MyColor, red, green, blue); + + void f () + { + MyColor color = amrex::getEnum("red"); // MyColor::red + std::string name = amrex::getEnumNameString(MyColor::blue); // "blue" + std::vector names = amrex::getEnumNameStrings(); + // names = {"red", "green", "blue"}; + std::string class_name = amrex::getEnumClassName(); // "MyColor" + } + +This allows us to read :cpp:`ParmParse` parameters into enum class objects. + +.. highlight:: python + +:: + + color1 = red + color2 = BLue + +The following code shows how to query the enumerators. + +.. highlight:: c++ + +:: + + AMREX_ENUM(MyColor, none, red, green, blue); + + void f (MyColor& c1, MyColor& c2) + { + ParmParse pp; + pp.query("color1", c1); // c1 becomes MyColor::red + pp.query_enum_case_insensitive("color2", c2); // c2 becomes MyColor::blue + MyColor default_color; // MyColor::none + pp.query("color3", default_color); // Still MyColor::none + } + Overriding Parameters with Command-Line Arguments ------------------------------------------------- diff --git a/Src/Base/AMReX_Enum.H b/Src/Base/AMReX_Enum.H new file mode 100644 index 0000000000..09583f5b73 --- /dev/null +++ b/Src/Base/AMReX_Enum.H @@ -0,0 +1,81 @@ +#ifndef AMREX_ENUM_H_ +#define AMREX_ENUM_H_ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +template +using amrex_enum_traits = decltype(amrex_get_enum_traits(std::declval())); + +namespace amrex { + template , + std::enable_if_t = 0> + T getEnum (std::string_view const& s) + { + auto pos = ET::enum_names.find(s); + if (pos == std::string_view::npos) { + std::string error_msg("amrex::getEnum: Unknown enum: "); + error_msg.append(s).append(" in AMREX_ENUM(").append(ET::class_name) + .append(", ").append(ET::enum_names).append(")."); + throw std::runtime_error(error_msg); + } + auto count = std::count(ET::enum_names.begin(), + ET::enum_names.begin()+pos, ','); + return static_cast(count); + } + + template , + std::enable_if_t = 0> + std::string getEnumNameString (T const& v) + { + auto n = static_cast(v); + std::size_t pos = 0; + for (int i = 0; i < n; ++i) { + pos = ET::enum_names.find(',', pos); + if (pos == std::string::npos) { + std::string error_msg("amrex::getEnum: Unknown enum value: "); + error_msg.append(std::to_string(n)).append(" in AMREX_ENUM(") + .append(ET::class_name).append(", ").append(ET::enum_names) + .append(")."); + throw std::runtime_error(error_msg); + } + ++pos; + } + auto pos2 = ET::enum_names.find(',', pos); + return amrex::trim(std::string(ET::enum_names.substr(pos,pos2-pos))); + } + + template , + std::enable_if_t = 0> + std::vector getEnumNameStrings () + { + return amrex::split(std::string(ET::enum_names), ", "); + } + + template , + std::enable_if_t = 0> + std::string getEnumClassName () + { + return std::string(ET::class_name); + } +} + +#define AMREX_ENUM(CLASS, ...) \ + enum class CLASS : int { __VA_ARGS__ }; \ + struct CLASS##_EnumTraits { \ + using enum_class_t = CLASS; \ + static constexpr bool value = true; \ + static constexpr std::string_view class_name{#CLASS}; \ + static constexpr std::string_view enum_names{#__VA_ARGS__}; \ + }; \ + CLASS##_EnumTraits amrex_get_enum_traits(CLASS) + +#endif diff --git a/Src/Base/AMReX_ParmParse.H b/Src/Base/AMReX_ParmParse.H index c9e273643e..a844dcc1aa 100644 --- a/Src/Base/AMReX_ParmParse.H +++ b/Src/Base/AMReX_ParmParse.H @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -1118,6 +1119,138 @@ public: } } + /** + * \brief. Query enum value using given name. + * + * Here T is an enum class defined by AMREX_ENUM. The return value + * indicates if `name` is found. An exception is thrown, if the found + * string associated with the name cannot be converted to an enumerator + * (i.e., the string does not match any names in the definition of T). + */ + template , + std::enable_if_t = 0> + int query (const char* name, T& ref) + { + std::string s; + int exist = this->query(name, s); + if (exist) { + try { + ref = amrex::getEnum(s); + } catch (...) { + throw; + } + } + return exist; + } + + /** + * \brief. Get enum value using given name. + * + * Here T is an enum class defined by AMREX_ENUM. It's a runtime error, + * if `name` is not found. An exception is thrown, if the found string + * associated with the name cannot be converted to an enumerator (i.e., + * the string does not match any names in the definition of T). + */ + template , + std::enable_if_t = 0> + void get (const char* name, T& ref) + { + std::string s; + this->get(name, s); + try { + ref = amrex::getEnum(s); + } catch (...) { + throw; + } + } + + //! Query an array of enum values using given name. + template , + std::enable_if_t = 0> + int queryarr (const char* name, std::vector& ref) + { + std::vector s; + int exist = this->queryarr(name, s); + if (exist) { + ref.resize(s.size()); + for (std::size_t i = 0; i < s.size(); ++i) { + ref[i] = amrex::getEnum(s[i]); + } + } + return exist; + } + + //! Get an array of enum values using given name. + template , + std::enable_if_t = 0> + void getarr (const char* name, std::vector& ref) + { + std::vector s; + this->getarr(name, s); + ref.resize(s.size()); + for (std::size_t i = 0; i < s.size(); ++i) { + ref[i] = amrex::getEnum(s[i]); + } + } + + /** + * \brief. Query enum value using given name. + * + * Here T is an enum class defined by AMREX_ENUM. The return value + * indicates if `name` is found. An exception is thrown, if the found + * string associated with the name cannot be case-insensitively + * converted to an enumerator (i.e., the found string, not `name`, does + * not case-insensitively match any names in the definition of T). If + * there are multiple matches, the first one is used. + */ + template , + std::enable_if_t = 0> + int query_enum_case_insensitive (const char* name, T& ref) + { + std::string s; + int exist = this->query(name, s); + if (exist) { + s = amrex::toLower(s); + auto const& enum_names = amrex::getEnumNameStrings(); + auto found = std::find_if(enum_names.begin(), enum_names.end(), + [&] (std::string const& ename) { + return amrex::toLower(ename) == s; + }); + if (found != enum_names.end()) { + ref = static_cast(std::distance(enum_names.begin(), found)); + } else { + std::string msg("query_enum_case_insensitive(\""); + msg.append(name).append("\",").append(amrex::getEnumClassName()) + .append("&) failed."); + throw std::runtime_error(msg); + } + } + return exist; + } + + /** + * \brief. Get enum value using given name. + * + * Here T is an enum class defined by AMREX_ENUM. It's a runtime error, + * if `name` is not found. An exception is thrown, if the found string + * associated with the name cannot be case-insensitively converted to an + * enumerator (i.e., the found string, not `name`, does not + * case-insensitively match any names in the definition of T). If there + * are multiple matches, the first one is used. + */ + template , + std::enable_if_t = 0> + void get_enum_case_insensitive (const char* name, T& ref) + { + int exist = this->query_enum_case_insensitive(name, ref); + if (!exist) { + std::string msg("get_enum_case_insensitive(\""); + msg.append(name).append("\",").append(amrex::getEnumClassName()) + .append("&) failed."); + amrex::Abort(msg); + } + } + //! Remove given name from the table. int remove (const char* name); diff --git a/Src/Base/AMReX_String.H b/Src/Base/AMReX_String.H new file mode 100644 index 0000000000..147b7ab187 --- /dev/null +++ b/Src/Base/AMReX_String.H @@ -0,0 +1,30 @@ +#ifndef AMREX_STRING_H_ +#define AMREX_STRING_H_ +#include + +#include +#include + +namespace amrex { + + //! Converts all characters of the string into lower case based on std::locale + std::string toLower (std::string s); + + //! Converts all characters of the string into uppercase based on std::locale + std::string toUpper (std::string s); + + //! Trim leading and trailing characters in the optional `space` + //! argument. + std::string trim (std::string s, std::string const& space = " \t"); + + //! Returns rootNNNN where NNNN == num. + std::string Concatenate (const std::string& root, + int num, + int mindigits = 5); + + //! Split a string using given tokens in `sep`. + std::vector split (std::string const& s, + std::string const& sep = " \t"); +} + +#endif diff --git a/Src/Base/AMReX_String.cpp b/Src/Base/AMReX_String.cpp new file mode 100644 index 0000000000..24dbce4532 --- /dev/null +++ b/Src/Base/AMReX_String.cpp @@ -0,0 +1,54 @@ +#include +#include + +#include +#include +#include +#include + +namespace amrex { + +std::string toLower (std::string s) +{ + std::transform(s.begin(), s.end(), s.begin(), + [](unsigned char c) { return std::tolower(c); }); + return s; +} + +std::string toUpper (std::string s) +{ + std::transform(s.begin(), s.end(), s.begin(), + [](unsigned char c) { return std::toupper(c); }); + return s; +} + +std::string trim(std::string s, std::string const& space) +{ + const auto sbegin = s.find_first_not_of(space); + if (sbegin == std::string::npos) { return std::string{}; } + const auto send = s.find_last_not_of(space); + s = s.substr(sbegin, send-sbegin+1); + return s; +} + +std::string Concatenate (const std::string& root, int num, int mindigits) +{ + BL_ASSERT(mindigits >= 0); + std::stringstream result; + result << root << std::setfill('0') << std::setw(mindigits) << num; + return result.str(); +} + +std::vector split (std::string const& s, std::string const& sep) +{ + std::vector result; + std::size_t pos_begin, pos_end = 0; + while ((pos_begin = s.find_first_not_of(sep,pos_end)) != std::string::npos) { + pos_end = s.find_first_of(sep,pos_begin); + result.push_back(s.substr(pos_begin,pos_end-pos_begin)); + if (pos_end == std::string::npos) { break; } + } + return result; +} + +} diff --git a/Src/Base/AMReX_Utility.H b/Src/Base/AMReX_Utility.H index 016b8adb0e..6bec276dbf 100644 --- a/Src/Base/AMReX_Utility.H +++ b/Src/Base/AMReX_Utility.H @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -44,17 +45,6 @@ namespace amrex const std::vector& Tokenize (const std::string& instr, const std::string& separators); - //! Converts all characters of the string into lower or uppercase based on std::locale - std::string toLower (std::string s); - std::string toUpper (std::string s); - - //! Trim leading and trailing white space - std::string trim (std::string s, std::string const& space = " \t"); - - //! Returns rootNNNN where NNNN == num. - std::string Concatenate (const std::string& root, - int num, - int mindigits = 5); /** * \brief Creates the specified directories. path may be either a full pathname * or a relative pathname. It will create all the directories in the diff --git a/Src/Base/AMReX_Utility.cpp b/Src/Base/AMReX_Utility.cpp index 1c79dfba92..aa3d8a2d16 100644 --- a/Src/Base/AMReX_Utility.cpp +++ b/Src/Base/AMReX_Utility.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -113,44 +112,6 @@ amrex::Tokenize (const std::string& instr, return tokens; } -std::string -amrex::toLower (std::string s) -{ - std::transform(s.begin(), s.end(), s.begin(), - [](unsigned char c) { return std::tolower(c); }); - return s; -} - -std::string -amrex::toUpper (std::string s) -{ - std::transform(s.begin(), s.end(), s.begin(), - [](unsigned char c) { return std::toupper(c); }); - return s; -} - -std::string -amrex::trim(std::string s, std::string const& space) -{ - const auto sbegin = s.find_first_not_of(space); - if (sbegin == std::string::npos) { return std::string{}; } - const auto send = s.find_last_not_of(space); - s = s.substr(sbegin, send-sbegin+1); - return s; -} - -std::string -amrex::Concatenate (const std::string& root, - int num, - int mindigits) -{ - BL_ASSERT(mindigits >= 0); - std::stringstream result; - result << root << std::setfill('0') << std::setw(mindigits) << num; - return result.str(); -} - - bool amrex::UtilCreateDirectory (const std::string& path, mode_t mode, bool verbose) diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt index cebd1f9bce..0436ad032e 100644 --- a/Src/Base/CMakeLists.txt +++ b/Src/Base/CMakeLists.txt @@ -12,6 +12,7 @@ foreach(D IN LISTS AMReX_SPACEDIM) AMReX_Array.H AMReX_BlockMutex.H AMReX_BlockMutex.cpp + AMReX_Enum.H AMReX_GpuComplex.H AMReX_Vector.H AMReX_TableData.H @@ -30,6 +31,8 @@ foreach(D IN LISTS AMReX_SPACEDIM) AMReX_parmparse_fi.cpp AMReX_ParmParse.H AMReX_Functional.H + AMReX_String.H + AMReX_String.cpp AMReX_Utility.H AMReX_Utility.cpp AMReX_FileSystem.H diff --git a/Src/Base/Make.package b/Src/Base/Make.package index dfbfb4f03a..b009ebf7d6 100644 --- a/Src/Base/Make.package +++ b/Src/Base/Make.package @@ -2,6 +2,7 @@ AMREX_BASE=EXE C$(AMREX_BASE)_headers += AMReX_ccse-mpi.H AMReX_Algorithm.H AMReX_Any.H AMReX_Array.H +C$(AMREX_BASE)_headers += AMReX_Enum.H C$(AMREX_BASE)_headers += AMReX_Vector.H AMReX_TableData.H AMReX_Tuple.H AMReX_Math.H C$(AMREX_BASE)_headers += AMReX_TypeList.H @@ -22,6 +23,9 @@ C$(AMREX_BASE)_sources += AMReX_PODVector.cpp C$(AMREX_BASE)_headers += AMReX_BlockMutex.H C$(AMREX_BASE)_sources += AMReX_BlockMutex.cpp +C$(AMREX_BASE)_headers += AMReX_String.H +C$(AMREX_BASE)_sources += AMReX_String.cpp + C$(AMREX_BASE)_sources += AMReX_ParmParse.cpp AMReX_parmparse_fi.cpp AMReX_Utility.cpp C$(AMREX_BASE)_headers += AMReX_ParmParse.H AMReX_Utility.H AMReX_BLassert.H AMReX_ArrayLim.H C$(AMREX_BASE)_headers += AMReX_Functional.H AMReX_Reduce.H AMReX_Scan.H AMReX_Partition.H diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index 01f187b664..3f801cde2b 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -121,7 +121,7 @@ else() # # List of subdirectories to search for CMakeLists. # - set( AMREX_TESTS_SUBDIRS Amr AsyncOut CLZ CTOParFor DeviceGlobal + set( AMREX_TESTS_SUBDIRS Amr AsyncOut CLZ CTOParFor DeviceGlobal Enum MultiBlock Parser Parser2 Reinit RoundoffDomain) if (AMReX_PARTICLES) diff --git a/Tests/Enum/CMakeLists.txt b/Tests/Enum/CMakeLists.txt new file mode 100644 index 0000000000..9c0e7f321d --- /dev/null +++ b/Tests/Enum/CMakeLists.txt @@ -0,0 +1,9 @@ +foreach(D IN LISTS AMReX_SPACEDIM) + set(_sources main.cpp) + set(_input_files inputs) + + setup_test(${D} _sources _input_files) + + unset(_sources) + unset(_input_files) +endforeach() diff --git a/Tests/Enum/GNUmakefile b/Tests/Enum/GNUmakefile new file mode 100644 index 0000000000..d0d895ff52 --- /dev/null +++ b/Tests/Enum/GNUmakefile @@ -0,0 +1,24 @@ +AMREX_HOME := ../.. + +DEBUG = FALSE + +DIM = 3 + +COMP = gcc + +USE_MPI = FALSE +USE_OMP = FALSE +USE_CUDA = FALSE +USE_HIP = FALSE +USE_SYCL = FALSE + +BL_NO_FORT = TRUE + +TINY_PROFILE = FALSE + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +include ./Make.package +include $(AMREX_HOME)/Src/Base/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules diff --git a/Tests/Enum/Make.package b/Tests/Enum/Make.package new file mode 100644 index 0000000000..6b4b865e8f --- /dev/null +++ b/Tests/Enum/Make.package @@ -0,0 +1 @@ +CEXE_sources += main.cpp diff --git a/Tests/Enum/inputs b/Tests/Enum/inputs new file mode 100644 index 0000000000..0972043658 --- /dev/null +++ b/Tests/Enum/inputs @@ -0,0 +1,10 @@ + +color1 = red +color2 = green +color3 = blue +color4 = greenxxx +color5 = Blue + +colors = cyan yellow orange + + diff --git a/Tests/Enum/main.cpp b/Tests/Enum/main.cpp new file mode 100644 index 0000000000..6fb25b01a5 --- /dev/null +++ b/Tests/Enum/main.cpp @@ -0,0 +1,99 @@ +#include +#include +#include + +using namespace amrex; + +AMREX_ENUM(MyColor, red, green, blue ); + +namespace my_namespace { + AMREX_ENUM(MyColor, orange, yellow,cyan ); +} + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc, argv); + { + auto const& names = amrex::getEnumNameStrings(); + auto const& names2 = amrex::getEnumNameStrings(); + amrex::Print() << "colors:"; + for (auto const& name : names) { + amrex::Print() << " " << name; + } + amrex::Print() << "\n"; + amrex::Print() << "colors:"; + for (auto const& name : names2) { + amrex::Print() << " " << name; + } + amrex::Print() << "\n"; + + ParmParse pp; + { + auto color = static_cast(999); + pp.query("color1", color); + amrex::Print() << "color = " << amrex::getEnumNameString(color) << '\n'; + AMREX_ALWAYS_ASSERT(color == MyColor::red); + } + { + auto color = static_cast(999); + pp.get("color2", color); + amrex::Print() << "color = " << amrex::getEnumNameString(color) << '\n'; + AMREX_ALWAYS_ASSERT(color == MyColor::green); + } + { + auto color = static_cast(999); + pp.get("color3", color); + amrex::Print() << "color = " << amrex::getEnumNameString(color) << '\n'; + AMREX_ALWAYS_ASSERT(color == MyColor::blue); + } + { + auto color = static_cast(999); + try { + pp.query("color4", color); + } catch (std::runtime_error const& e) { + amrex::Print() << "As expected, " << e.what() << '\n'; + } + AMREX_ALWAYS_ASSERT(color == static_cast(999)); + try { + pp.get_enum_case_insensitive("color4", color); + } catch (std::runtime_error const& e) { + amrex::Print() << "As expected, " << e.what() << '\n'; + } + AMREX_ALWAYS_ASSERT(color == static_cast(999)); + } + { + auto color = static_cast(999); + try { + pp.query("color5", color); + } catch (std::runtime_error const& e) { + amrex::Print() << "As expected, " << e.what() << '\n'; + } + AMREX_ALWAYS_ASSERT(color == static_cast(999)); + pp.query_enum_case_insensitive("color5", color); + amrex::Print() << "color = " << amrex::getEnumNameString(color) << '\n'; + AMREX_ALWAYS_ASSERT(color == MyColor::blue); + } + { + std::vector color; + pp.getarr("colors", color); + AMREX_ALWAYS_ASSERT(color.size() == 3 && + color[0] == my_namespace::MyColor::cyan && + color[1] == my_namespace::MyColor::yellow && + color[2] == my_namespace::MyColor::orange); + std::vector color2; + pp.queryarr("colors", color2); + AMREX_ALWAYS_ASSERT(color.size() == 3 && + color == color2 && + color[0] == my_namespace::MyColor::cyan && + color[1] == my_namespace::MyColor::yellow && + color[2] == my_namespace::MyColor::orange); + amrex::Print() << "colors:"; + for (auto const& c : color) { + amrex::Print() << " " << amrex::getEnumNameString(c); + } + amrex::Print() << "\n"; + } + } + + amrex::Finalize(); +} From b82296fbf8ff30a47eeb143575663080a284feda Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 15 Aug 2024 15:39:27 -0500 Subject: [PATCH 6/9] MLEBNodeFDLaplacian: Make it work with AMREX_USE_EB but with no EB (#4083) This will allow the use of the linear solver when AMReX is built with EB support at compile time but no EB is built at runtime. X-Ref: https://github.com/ECP-WarpX/WarpX/pull/4865 --- .../MLMG/AMReX_MLEBNodeFDLaplacian.cpp | 82 +++++++++++-------- 1 file changed, 48 insertions(+), 34 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp index 09bb6d87e1..eaaffd97cf 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp @@ -5,6 +5,7 @@ #include #ifdef AMREX_USE_EB +#include #include #endif @@ -151,10 +152,14 @@ MLEBNodeFDLaplacian::define (const Vector& a_geom, std::unique_ptr > MLEBNodeFDLaplacian::makeFactory (int amrlev, int mglev) const { - return makeEBFabFactory(m_geom[amrlev][mglev], - m_grids[amrlev][mglev], - m_dmap[amrlev][mglev], - {1,1,1}, EBSupport::full); + if (EB2::TopIndexSpaceIfPresent()) { + return makeEBFabFactory(m_geom[amrlev][mglev], + m_grids[amrlev][mglev], + m_dmap[amrlev][mglev], + {1,1,1}, EBSupport::full); + } else { + return MLNodeLinOp::makeFactory(amrlev, mglev); + } } #endif @@ -264,17 +269,19 @@ MLEBNodeFDLaplacian::prepareForSolve () for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { for (int mglev = 0; mglev < m_num_mg_levels[amrlev]; ++mglev) { const auto *factory = dynamic_cast(m_factory[amrlev][mglev].get()); - auto const& levset_mf = factory->getLevelSet(); - auto const& levset_ar = levset_mf.const_arrays(); - auto& dmask_mf = *m_dirichlet_mask[amrlev][mglev]; - auto const& dmask_ar = dmask_mf.arrays(); - amrex::ParallelFor(dmask_mf, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - { - if (levset_ar[box_no](i,j,k) >= Real(0.0)) { - dmask_ar[box_no](i,j,k) = -1; - } - }); + if (factory) { + auto const& levset_mf = factory->getLevelSet(); + auto const& levset_ar = levset_mf.const_arrays(); + auto& dmask_mf = *m_dirichlet_mask[amrlev][mglev]; + auto const& dmask_ar = dmask_mf.arrays(); + amrex::ParallelFor(dmask_mf, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + if (levset_ar[box_no](i,j,k) >= Real(0.0)) { + dmask_ar[box_no](i,j,k) = -1; + } + }); + } } } #endif @@ -359,8 +366,10 @@ MLEBNodeFDLaplacian::prepareForSolve () void MLEBNodeFDLaplacian::scaleRHS (int amrlev, MultiFab& rhs) const { - auto const& dmask = *m_dirichlet_mask[amrlev][0]; const auto *factory = dynamic_cast(m_factory[amrlev][0].get()); + if (!factory) { return; } + + auto const& dmask = *m_dirichlet_mask[amrlev][0]; auto const& edgecent = factory->getEdgeCent(); #ifdef AMREX_USE_OMP @@ -407,9 +416,10 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa #ifdef AMREX_USE_EB const auto phieb = (m_in_solution_mode) ? m_s_phi_eb : Real(0.0); const auto *factory = dynamic_cast(m_factory[amrlev][mglev].get()); - auto const& edgecent = factory->getEdgeCent(); - auto const& levset_mf = factory->getLevelSet(); - auto const& volfrac = factory->getVolFrac(); + Array edgecent {AMREX_D_DECL(nullptr,nullptr,nullptr)}; + if (factory) { + edgecent = factory->getEdgeCent(); + } #endif #ifdef AMREX_USE_OMP @@ -422,12 +432,12 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa Array4 const& yarr = out.array(mfi); Array4 const& dmarr = dmask.const_array(mfi); #ifdef AMREX_USE_EB - bool cutfab = edgecent[0]->ok(mfi); - if (cutfab) { + bool cutfab = edgecent[0] && edgecent[0]->ok(mfi); + if (cutfab && factory) { // clang-tidy is not that smart AMREX_D_TERM(Array4 const& ecx = edgecent[0]->const_array(mfi);, Array4 const& ecy = edgecent[1]->const_array(mfi);, Array4 const& ecz = edgecent[2]->const_array(mfi)); - auto const& levset = levset_mf.const_array(mfi); + auto const& levset = factory->getVolFrac().const_array(mfi); if (phieb == std::numeric_limits::lowest()) { auto const& phiebarr = m_phi_eb[amrlev].const_array(mfi); #if (AMREX_SPACEDIM == 2) @@ -441,7 +451,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa #endif if (m_has_sigma_mf) { auto const& sigarr = m_sigma_mf[amrlev][mglev]->const_array(mfi); - auto const& vfrc = volfrac.const_array(mfi); + auto const& vfrc = factory->getVolFrac().const_array(mfi); AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_sig_adotx_eb(i,j,k,yarr,xarr,levset,dmarr,AMREX_D_DECL(ecx,ecy,ecz), @@ -466,7 +476,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa #endif if (m_has_sigma_mf) { auto const& sigarr = m_sigma_mf[amrlev][mglev]->const_array(mfi); - auto const& vfrc = volfrac.const_array(mfi); + auto const& vfrc = factory->getVolFrac().const_array(mfi); AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_sig_adotx_eb(i,j,k,yarr,xarr,levset,dmarr,AMREX_D_DECL(ecx,ecy,ecz), @@ -533,9 +543,10 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF #ifdef AMREX_USE_EB const auto *factory = dynamic_cast(m_factory[amrlev][mglev].get()); - auto const& edgecent = factory->getEdgeCent(); - auto const& levset_mf = factory->getLevelSet(); - auto const& volfrac = factory->getVolFrac(); + Array edgecent {AMREX_D_DECL(nullptr,nullptr,nullptr)}; + if (factory) { + edgecent = factory->getEdgeCent(); + } #endif #ifdef AMREX_USE_OMP @@ -548,12 +559,12 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF Array4 const& rhsarr = rhs.const_array(mfi); Array4 const& dmskarr = dmask.const_array(mfi); #ifdef AMREX_USE_EB - bool cutfab = edgecent[0]->ok(mfi); - if (cutfab) { + bool cutfab = edgecent[0] && edgecent[0]->ok(mfi); + if (cutfab && factory) { // clang-tidy is not that smart AMREX_D_TERM(Array4 const& ecx = edgecent[0]->const_array(mfi);, Array4 const& ecy = edgecent[1]->const_array(mfi);, Array4 const& ecz = edgecent[2]->const_array(mfi)); - auto const& levset = levset_mf.const_array(mfi); + auto const& levset = factory->getLevelSet().const_array(mfi); #if (AMREX_SPACEDIM == 2) if (m_rz) { AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, @@ -565,7 +576,7 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF #endif if (m_has_sigma_mf) { auto const& sigarr = m_sigma_mf[amrlev][mglev]->const_array(mfi); - auto const& vfrc = volfrac.const_array(mfi); + auto const& vfrc = factory->getVolFrac().const_array(mfi); AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_sig_gsrb_eb(i,j,k,solarr,rhsarr,levset,dmskarr,AMREX_D_DECL(ecx,ecy,ecz), @@ -641,8 +652,10 @@ MLEBNodeFDLaplacian::compGrad (int amrlev, const Array auto const& dmask = *m_dirichlet_mask[amrlev][mglev]; const auto phieb = m_s_phi_eb; const auto *factory = dynamic_cast(m_factory[amrlev][mglev].get()); - AMREX_ASSERT(factory); - auto const& edgecent = factory->getEdgeCent(); + Array edgecent {AMREX_D_DECL(nullptr,nullptr,nullptr)}; + if (factory) { + edgecent = factory->getEdgeCent(); + } #endif #ifdef AMREX_USE_OMP @@ -659,7 +672,7 @@ MLEBNodeFDLaplacian::compGrad (int amrlev, const Array Array4 const& gpz = grad[2]->array(mfi);) #ifdef AMREX_USE_EB Array4 const& dmarr = dmask.const_array(mfi); - bool cutfab = edgecent[0]->ok(mfi); + bool cutfab = edgecent[0] && edgecent[0]->ok(mfi); AMREX_D_TERM(Array4 const& ecx = cutfab ? edgecent[0]->const_array(mfi) : Array4{};, Array4 const& ecy @@ -741,6 +754,7 @@ MLEBNodeFDLaplacian::postSolve (Vector& sol) const for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { const auto phieb = m_s_phi_eb; const auto *factory = dynamic_cast(m_factory[amrlev][0].get()); + if (!factory) { return; } auto const& levset_mf = factory->getLevelSet(); auto const& levset_ar = levset_mf.const_arrays(); MultiFab& mf = sol[amrlev]; From d9919c92db09ed9e927f9e4c7fb2ddcc161ab5ed Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 15 Aug 2024 18:04:52 -0700 Subject: [PATCH 7/9] MLEBNodeFDLaplacian: Regression (#4085) ## Summary Fix copy-paste regression from #4083 ## Additional background https://github.com/AMReX-Codes/amrex/pull/4083#discussion_r1719149149 --- Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp index eaaffd97cf..af4a6a6d74 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp @@ -437,7 +437,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa AMREX_D_TERM(Array4 const& ecx = edgecent[0]->const_array(mfi);, Array4 const& ecy = edgecent[1]->const_array(mfi);, Array4 const& ecz = edgecent[2]->const_array(mfi)); - auto const& levset = factory->getVolFrac().const_array(mfi); + auto const& levset = factory->getLevelSet().const_array(mfi); if (phieb == std::numeric_limits::lowest()) { auto const& phiebarr = m_phi_eb[amrlev].const_array(mfi); #if (AMREX_SPACEDIM == 2) From c061a14011d4d58755f330d15a20f7243863a90d Mon Sep 17 00:00:00 2001 From: Roelof Groenewald <40245517+roelof-groenewald@users.noreply.github.com> Date: Fri, 16 Aug 2024 16:48:18 -0700 Subject: [PATCH 8/9] Add overload for `getParticleCell` that returns local cell index (#4081) ## Summary In WarpX we have a few routines that requires the local cell index of particles (i.e. the cell number in the current box). The AMReX function `getParticleCell` currently only has logic to return the global cell index (relative to the domain low end). This PR adds a new overload for that function which does not shift the cell index based on the global box index. ## Additional background See https://github.com/ECP-WarpX/WarpX/pull/5118 for WarpX context. ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Particle/AMReX_ParticleUtil.H | 41 +++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index 6b31db7e99..b09f1d3583 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -356,17 +356,54 @@ struct GetParticleBin } }; +/** + * \brief Returns the cell index for a given particle using the + * provided lower bounds and cell sizes. + * + * This version indexes cells starting from 0 at the lower left corner of + * the provided lower bounds, i.e., it returns a local index. + * + * \tparam P a type of AMReX particle. + * + * \param p the particle for which the cell index is calculated + * \param plo the low end of the domain + * \param dxi cell sizes in each dimension + */ template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE IntVect getParticleCell (P const& p, amrex::GpuArray const& plo, - amrex::GpuArray const& dxi, - const Box& domain) noexcept + amrex::GpuArray const& dxi) noexcept { IntVect iv( AMREX_D_DECL(int(amrex::Math::floor((p.pos(0)-plo[0])*dxi[0])), int(amrex::Math::floor((p.pos(1)-plo[1])*dxi[1])), int(amrex::Math::floor((p.pos(2)-plo[2])*dxi[2])))); + return iv; +} + +/** + * \brief Returns the cell index for a given particle using the + * provided lower bounds, cell sizes and global domain offset. + * + * This version indexes cells starting from 0 at the lower left corner of + * the simulation geometry, i.e., it returns a global index. + * + * \tparam P a type of AMReX particle. + * + * \param p the particle for which the cell index is calculated + * \param plo the low end of the domain + * \param dxi cell sizes in each dimension + * \param domain AMReX box in which the given particle resides + */ +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +IntVect getParticleCell (P const& p, + amrex::GpuArray const& plo, + amrex::GpuArray const& dxi, + const Box& domain) noexcept +{ + IntVect iv = getParticleCell(p, plo, dxi); iv += domain.smallEnd(); return iv; } From 8222a13de06d3fe063a1f635623348905c2475f8 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sat, 17 Aug 2024 21:11:00 -0500 Subject: [PATCH 9/9] Multi-level Hypre: Fix bugs (#4089) The stencil was wrong at the corner where the coarse/fine boundary meets the domain boundary. --- Src/Extern/HYPRE/AMReX_HypreMLABecLap.H | 3 + Src/Extern/HYPRE/AMReX_HypreMLABecLap.cpp | 57 +++-- Src/Extern/HYPRE/AMReX_HypreMLABecLap_2D_K.H | 34 ++- Src/Extern/HYPRE/AMReX_HypreMLABecLap_3D_K.H | 224 +++++++++++-------- Src/Extern/HYPRE/AMReX_HypreMLABecLap_K.H | 8 +- Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H | 33 ++- Src/LinearSolvers/MLMG/AMReX_MLLinOp.H | 2 + Src/LinearSolvers/MLMG/AMReX_MLMG.H | 12 + 8 files changed, 250 insertions(+), 123 deletions(-) diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap.H b/Src/Extern/HYPRE/AMReX_HypreMLABecLap.H index 04147207d3..6f687766b8 100644 --- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap.H +++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap.H @@ -39,6 +39,7 @@ public: void setVerbose (int v) { m_verbose = v; } void setMaxIter (int v) { m_maxiter = v; } + void setIsSingular (bool v) { m_is_singular = v; } void setup (Real a_ascalar, Real a_bscalar, Vector const& a_acoefs, @@ -65,6 +66,7 @@ private: int m_verbose = 0; int m_maxiter = 200; + bool m_is_singular = false; Vector m_geom; Vector m_grids; @@ -87,6 +89,7 @@ private: Vector> m_bndry; Vector> m_bndry_rhs; Vector m_fine_masks; + Vector m_crse_masks; // For coarse cells at coarse/fine interface. The vector is for AMR // levels. diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap.cpp b/Src/Extern/HYPRE/AMReX_HypreMLABecLap.cpp index d7621c6bd0..b726f307bb 100644 --- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap.cpp +++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap.cpp @@ -57,11 +57,16 @@ HypreMLABecLap::HypreMLABecLap (Vector a_geom, } m_fine_masks.resize(m_nlevels-1); + m_crse_masks.resize(m_nlevels-1); for (int ilev = 0; ilev < m_nlevels-1; ++ilev) { m_fine_masks[ilev] = amrex::makeFineMask(m_grids[ilev], m_dmap[ilev], IntVect(1), m_grids[ilev+1], m_ref_ratio[ilev], m_geom[ilev].periodicity(), 0, 1); + m_crse_masks[ilev].define(m_grids[ilev], m_dmap[ilev], 1, 1); + m_crse_masks[ilev].BuildMask(m_geom[ilev].Domain(), + m_geom[ilev].periodicity(), + 1, 0, 0, 1); } m_c2f_offset_from.resize(m_nlevels-1); @@ -588,12 +593,19 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar, const auto boxlo = amrex::lbound(vbx); const auto boxhi = amrex::ubound(vbx); // Set up stencil part of the matrix + auto fixed_pt = IntVect::TheMaxVector(); + if (m_is_singular && m_nlevels-1 == ilev) { + auto const& box0 = m_grids.back()[0]; + fixed_pt = box0.smallEnd() + 1; + // This cell does not have any non-stencil entries. So it's + // a good point for fixing singularity. + } amrex::fill(matfab, [=] AMREX_GPU_HOST_DEVICE (GpuArray& sten, int i, int j, int k) { hypmlabeclap_mat(sten, i, j, k, boxlo, boxhi, sa, afab, sb, dx, bfabs, - bctype, bcl, bcmsk, bcval, bcrhs, ilev); + bctype, bcl, bcmsk, bcval, bcrhs, ilev, fixed_pt); }); bool need_sync = true; @@ -636,6 +648,7 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar, auto const& c2f_offset_to_a = m_c2f_offset_to[ilev].const_array(mfi); auto const& mat_a = matfab.array(); auto const& fine_mask = m_fine_masks[ilev].const_array(mfi); + auto const& crse_mask = m_crse_masks[ilev].const_array(mfi); AMREX_D_TERM(auto offset_bx_a = m_offset_cf_bcoefs[ilev][0].isDefined() ? m_offset_cf_bcoefs[ilev][0].const_array(mfi) : Array4{};, @@ -664,7 +677,7 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar, c2f_offset_to_a, dx, sb, AMREX_D_DECL(offset_bx_a,offset_by_a,offset_bz_a), AMREX_D_DECL(p_bx, p_by, p_bz), - fine_mask,rr); + fine_mask,rr, crse_mask); }); if (c2f_total_from > 0) { #ifdef AMREX_USE_GPU @@ -838,8 +851,8 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar, HYPRE_SStructSSAMGSetNumPostRelax(m_ss_solver, 4); HYPRE_SStructSSAMGSetNumCoarseRelax(m_ss_solver, 4); - HYPRE_SStructSSAMGSetLogging(m_ss_solver, m_verbose); - HYPRE_SStructSSAMGSetPrintLevel(m_ss_solver, m_verbose); + HYPRE_SStructSSAMGSetLogging(m_ss_solver, 1); + // HYPRE_SStructSSAMGSetPrintLevel(m_ss_solver, 1); /* 0: no, 1: setup, 2: solve, 3:both // HYPRE_SStructSSAMGSetup(m_ss_solver, A, b, x); @@ -854,15 +867,15 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar, HYPRE_BoomerAMGCreate(&m_solver); HYPRE_BoomerAMGSetOldDefault(m_solver); // Falgout coarsening with modified classical interpolation - HYPRE_BoomerAMGSetStrongThreshold(m_solver, (AMREX_SPACEDIM == 3) ? 0.6 : 0.25); // default is 0.25 + HYPRE_BoomerAMGSetStrongThreshold(m_solver, (AMREX_SPACEDIM == 3) ? 0.4 : 0.25); // default is 0.25 HYPRE_BoomerAMGSetRelaxOrder(m_solver, 1); /* 0: default, natural order, 1: C/F relaxation order */ HYPRE_BoomerAMGSetNumSweeps(m_solver, 2); /* Sweeps on fine levels */ // HYPRE_BoomerAMGSetFCycle(m_solver, 1); // default is 0 // HYPRE_BoomerAMGSetCoarsenType(m_solver, 6); // HYPRE_BoomerAMGSetRelaxType(m_solver, 6); /* G-S/Jacobi hybrid relaxation */ - HYPRE_BoomerAMGSetLogging(m_solver, m_verbose); - HYPRE_BoomerAMGSetPrintLevel(m_solver, m_verbose); + HYPRE_BoomerAMGSetLogging(m_solver, 1); + // HYPRE_BoomerAMGSetPrintLevel(m_solver, 1); /* 0: no, 1: setup, 2: solve, 3:both HYPRE_ParCSRMatrix par_A; HYPRE_SStructMatrixGetObject(m_ss_A, (void**) &par_A); @@ -956,6 +969,9 @@ void HypreMLABecLap::solve (Vector const& a_sol, Vector 0"); } + HYPRE_Int num_iterations; + Real final_res; + #ifdef AMREX_FEATURE_HYPRE_SSAMG if (m_hypre_solver_id == HypreSolverID::SSAMG) { @@ -965,15 +981,13 @@ void HypreMLABecLap::solve (Vector const& a_sol, Vector const& a_sol, Vector reltol) { + amrex::Abort("Hypre failed to converge after "+std::to_string(num_iterations)+ + " iterations. Final relative residual is "+std::to_string(final_res)); + } } // Get solution @@ -1044,8 +1061,6 @@ void HypreMLABecLap::solve (Vector const& a_sol, Vector= 0; --ilev) { amrex::average_down(*a_sol[ilev+1], *a_sol[ilev], 0, ncomp, m_ref_ratio[ilev]); } - - // xxxxx abort if convergence is not reached. } #ifdef AMREX_USE_GPU diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_2D_K.H b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_2D_K.H index 57a37f19bf..7d083e7d98 100644 --- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_2D_K.H +++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_2D_K.H @@ -109,7 +109,7 @@ void hypmlabeclap_c2f (int i, int j, int k, Array4 const& offset_by, Real const* bx, Real const* by, Array4 const& fine_mask, - IntVect const& rr) + IntVect const& rr, Array4 const& crse_mask) { if (fine_mask(i,j,k)) { // Let's set off-diagonal elements to zero @@ -159,9 +159,13 @@ void hypmlabeclap_c2f (int i, int j, int k, Real xInt = Real(-0.5) + (irx+Real(0.5))/Real(rr[0]); Real xc[3] = {Real(-1.0), Real(0.0), Real(1.0)}; Real ct[3] = {Real(0.0), Real(0.0), Real(0.0)}; - if (fine_mask(i-1,j,k)) { + if ( fine_mask(i-1,j,k) || + !crse_mask(i-1,j,k)) + { poly_interp_coeff<2>(xInt, &(xc[1]), &(ct[1])); - } else if (fine_mask(i+1,j,k)) { + } else if ( fine_mask(i+1,j,k) || + !crse_mask(i+1,j,k)) + { poly_interp_coeff<2>(xInt, xc, ct); } else { poly_interp_coeff<3>(xInt, xc, ct); @@ -202,9 +206,13 @@ void hypmlabeclap_c2f (int i, int j, int k, Real yInt = Real(-0.5) + (iry+Real(0.5))/Real(rr[1]); Real yc[3] = {Real(-1.0), Real(0.0), Real(1.0)}; Real ct[3] = {Real(0.0), Real(0.0), Real(0.0)}; - if (fine_mask(i,j-1,k)) { + if ( fine_mask(i,j-1,k) || + !crse_mask(i,j-1,k)) + { poly_interp_coeff<2>(yInt, &(yc[1]), &(ct[1])); - } else if (fine_mask(i,j+1,k)) { + } else if ( fine_mask(i,j+1,k) || + !crse_mask(i,j+1,k)) + { poly_interp_coeff<2>(yInt, yc, ct); } else { poly_interp_coeff<3>(yInt, yc, ct); @@ -244,9 +252,13 @@ void hypmlabeclap_c2f (int i, int j, int k, Real yInt = Real(-0.5) + (iry+Real(0.5))/Real(rr[1]); Real yc[3] = {Real(-1.0), Real(0.0), Real(1.0)}; Real ct[3] = {Real(0.0), Real(0.0), Real(0.0)}; - if (fine_mask(i,j-1,k)) { + if ( fine_mask(i,j-1,k) || + !crse_mask(i,j-1,k)) + { poly_interp_coeff<2>(yInt, &(yc[1]), &(ct[1])); - } else if (fine_mask(i,j+1,k)) { + } else if ( fine_mask(i,j+1,k) || + !crse_mask(i,j+1,k)) + { poly_interp_coeff<2>(yInt, yc, ct); } else { poly_interp_coeff<3>(yInt, yc, ct); @@ -286,9 +298,13 @@ void hypmlabeclap_c2f (int i, int j, int k, Real xInt = Real(-0.5) + (irx+Real(0.5))/Real(rr[0]); Real xc[3] = {Real(-1.0), Real(0.0), Real(1.0)}; Real ct[3] = {Real(0.0), Real(0.0), Real(0.0)}; - if (fine_mask(i-1,j,k)) { + if ( fine_mask(i-1,j,k) || + !crse_mask(i-1,j,k)) + { poly_interp_coeff<2>(xInt, &(xc[1]), &(ct[1])); - } else if (fine_mask(i+1,j,k)) { + } else if ( fine_mask(i+1,j,k) || + !crse_mask(i+1,j,k)) + { poly_interp_coeff<2>(xInt, xc, ct); } else { poly_interp_coeff<3>(xInt, xc, ct); diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_3D_K.H b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_3D_K.H index 8e6e1a39b1..431650236f 100644 --- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_3D_K.H +++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_3D_K.H @@ -166,7 +166,7 @@ void hypmlabeclap_c2f (int i, int j, int k, Array4 const& offset_bz, Real const* bx, Real const* by, Real const* bz, Array4 const& fine_mask, - IntVect const& rr) + IntVect const& rr, Array4 const& crse_mask) { if (fine_mask(i,j,k)) { // Let's set off-diagonal elements to zero @@ -191,7 +191,11 @@ void hypmlabeclap_c2f (int i, int j, int k, (! fine_mask(i,j-1,k-1)) && (! fine_mask(i,j+1,k-1)) && (! fine_mask(i,j-1,k+1)) && - (! fine_mask(i,j+1,k+1))) + (! fine_mask(i,j+1,k+1)) && + ( crse_mask(i,j-1,k-1)) && + ( crse_mask(i,j+1,k-1)) && + ( crse_mask(i,j-1,k+1)) && + ( crse_mask(i,j+1,k+1))) { corner[0] = true; } @@ -199,7 +203,11 @@ void hypmlabeclap_c2f (int i, int j, int k, (! fine_mask(i-1,j,k-1)) && (! fine_mask(i+1,j,k-1)) && (! fine_mask(i-1,j,k+1)) && - (! fine_mask(i+1,j,k+1))) + (! fine_mask(i+1,j,k+1)) && + ( crse_mask(i-1,j,k-1)) && + ( crse_mask(i+1,j,k-1)) && + ( crse_mask(i-1,j,k+1)) && + ( crse_mask(i+1,j,k+1))) { corner[1] = true; } @@ -207,7 +215,11 @@ void hypmlabeclap_c2f (int i, int j, int k, (! fine_mask(i-1,j-1,k)) && (! fine_mask(i+1,j-1,k)) && (! fine_mask(i-1,j+1,k)) && - (! fine_mask(i+1,j+1,k))) + (! fine_mask(i+1,j+1,k)) && + ( crse_mask(i-1,j-1,k)) && + ( crse_mask(i+1,j-1,k)) && + ( crse_mask(i-1,j+1,k)) && + ( crse_mask(i+1,j+1,k))) { corner[2] = true; } @@ -253,28 +265,34 @@ void hypmlabeclap_c2f (int i, int j, int k, Real fac0 = fac*cc[0]; Real s0 = Real(1.0); - if (!fine_mask(i-1,j,k) && !fine_mask(i+1,j,k)) { - s0 -= x*x; - stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); - stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); - } else if (!fine_mask(i-1,j,k)) { + if ( fine_mask(i-1,j,k) || + !crse_mask(i-1,j,k)) + { + s0 += Real(-0.5)*x; + stencil(i,j,k)[2] += fac0*Real(0.5)*x; + } else if ( fine_mask(i+1,j,k) || + !crse_mask(i+1,j,k)) { s0 += Real(0.5)*x; stencil(i,j,k)[1] += fac0*Real(-0.5)*x; } else { - s0 += Real(-0.5)*x; - stencil(i,j,k)[2] += fac0*Real(0.5)*x; + s0 -= x*x; + stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); + stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); } - if (!fine_mask(i,j-1,k) && !fine_mask(i,j+1,k)) { - s0 -= y*y; - stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); - stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); - } else if (!fine_mask(i,j-1,k)) { + if ( fine_mask(i,j-1,k) || + !crse_mask(i,j-1,k)) + { + s0 += Real(-0.5)*y; + stencil(i,j,k)[4] += fac0*Real(0.5)*y; + } else if ( fine_mask(i,j+1,k) || + !crse_mask(i,j+1,k)) { s0 += Real(0.5)*y; stencil(i,j,k)[3] += fac0*Real(-0.5)*y; } else { - s0 += Real(-0.5)*y; - stencil(i,j,k)[4] += fac0*Real(0.5)*y; + s0 -= y*y; + stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); + stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); } stencil(i,j,k)[0] += fac0*s0; @@ -322,28 +340,34 @@ void hypmlabeclap_c2f (int i, int j, int k, Real fac0 = fac*cc[0]; Real s0 = Real(1.0); - if (!fine_mask(i-1,j,k) && !fine_mask(i+1,j,k)) { - s0 -= x*x; - stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); - stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); - } else if (!fine_mask(i-1,j,k)) { + if ( fine_mask(i-1,j,k) || + !crse_mask(i-1,j,k)) + { + s0 += Real(-0.5)*x; + stencil(i,j,k)[2] += fac0*Real(0.5)*x; + } else if ( fine_mask(i+1,j,k) || + !crse_mask(i+1,j,k)) { s0 += Real(0.5)*x; stencil(i,j,k)[1] += fac0*Real(-0.5)*x; } else { - s0 += Real(-0.5)*x; - stencil(i,j,k)[2] += fac0*Real(0.5)*x; + s0 -= x*x; + stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); + stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); } - if (!fine_mask(i,j,k-1) && !fine_mask(i,j,k+1)) { - s0 -= z*z; - stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); - stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); - } else if (!fine_mask(i,j,k-1)) { + if ( fine_mask(i,j,k-1) || + !crse_mask(i,j,k-1)) + { + s0 += Real(-0.5)*z; + stencil(i,j,k)[6] += fac0*Real(0.5)*z; + } else if ( fine_mask(i,j,k+1) || + !crse_mask(i,j,k+1)) { s0 += Real(0.5)*z; stencil(i,j,k)[5] += fac0*Real(-0.5)*z; } else { - s0 += Real(-0.5)*z; - stencil(i,j,k)[6] += fac0*Real(0.5)*z; + s0 -= z*z; + stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); + stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); } stencil(i,j,k)[0] += fac0*s0; @@ -393,28 +417,34 @@ void hypmlabeclap_c2f (int i, int j, int k, Real fac0 = fac*cc[0]; Real s0 = Real(1.0); - if (!fine_mask(i,j-1,k) && !fine_mask(i,j+1,k)) { - s0 -= y*y; - stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); - stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); - } else if (!fine_mask(i,j-1,k)) { + if ( fine_mask(i,j-1,k) || + !crse_mask(i,j-1,k)) + { + s0 += Real(-0.5)*y; + stencil(i,j,k)[4] += fac0*Real(0.5)*y; + } else if ( fine_mask(i,j+1,k) || + !crse_mask(i,j+1,k)) { s0 += Real(0.5)*y; stencil(i,j,k)[3] += fac0*Real(-0.5)*y; } else { - s0 += Real(-0.5)*y; - stencil(i,j,k)[4] += fac0*Real(0.5)*y; + s0 -= y*y; + stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); + stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); } - if (!fine_mask(i,j,k-1) && !fine_mask(i,j,k+1)) { - s0 -= z*z; - stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); - stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); - } else if (!fine_mask(i,j,k-1)) { + if ( fine_mask(i,j,k-1) || + !crse_mask(i,j,k-1)) + { + s0 += Real(-0.5)*z; + stencil(i,j,k)[6] += fac0*Real(0.5)*z; + } else if ( fine_mask(i,j,k+1) || + !crse_mask(i,j,k+1)) { s0 += Real(0.5)*z; stencil(i,j,k)[5] += fac0*Real(-0.5)*z; } else { - s0 += Real(-0.5)*z; - stencil(i,j,k)[6] += fac0*Real(0.5)*z; + s0 -= z*z; + stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); + stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); } stencil(i,j,k)[0] += fac0*s0; @@ -463,28 +493,34 @@ void hypmlabeclap_c2f (int i, int j, int k, Real fac0 = fac*cc[0]; Real s0 = Real(1.0); - if (!fine_mask(i,j-1,k) && !fine_mask(i,j+1,k)) { - s0 -= y*y; - stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); - stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); - } else if (!fine_mask(i,j-1,k)) { + if ( fine_mask(i,j-1,k) || + !crse_mask(i,j-1,k)) + { + s0 += Real(-0.5)*y; + stencil(i,j,k)[4] += fac0*Real(0.5)*y; + } else if ( fine_mask(i,j+1,k) || + !crse_mask(i,j+1,k)) { s0 += Real(0.5)*y; stencil(i,j,k)[3] += fac0*Real(-0.5)*y; } else { - s0 += Real(-0.5)*y; - stencil(i,j,k)[4] += fac0*Real(0.5)*y; + s0 -= y*y; + stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); + stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); } - if (!fine_mask(i,j,k-1) && !fine_mask(i,j,k+1)) { - s0 -= z*z; - stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); - stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); - } else if (!fine_mask(i,j,k-1)) { + if ( fine_mask(i,j,k-1) || + !crse_mask(i,j,k-1)) + { + s0 += Real(-0.5)*z; + stencil(i,j,k)[6] += fac0*Real(0.5)*z; + } else if ( fine_mask(i,j,k+1) || + !crse_mask(i,j,k+1)) { s0 += Real(0.5)*z; stencil(i,j,k)[5] += fac0*Real(-0.5)*z; } else { - s0 += Real(-0.5)*z; - stencil(i,j,k)[6] += fac0*Real(0.5)*z; + s0 -= z*z; + stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); + stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); } stencil(i,j,k)[0] += fac0*s0; @@ -534,28 +570,34 @@ void hypmlabeclap_c2f (int i, int j, int k, Real fac0 = fac*cc[0]; Real s0 = Real(1.0); - if (!fine_mask(i-1,j,k) && !fine_mask(i+1,j,k)) { - s0 -= x*x; - stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); - stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); - } else if (!fine_mask(i-1,j,k)) { + if ( fine_mask(i-1,j,k) || + !crse_mask(i-1,j,k)) + { + s0 += Real(-0.5)*x; + stencil(i,j,k)[2] += fac0*Real(0.5)*x; + } else if ( fine_mask(i+1,j,k) || + !crse_mask(i+1,j,k)) { s0 += Real(0.5)*x; stencil(i,j,k)[1] += fac0*Real(-0.5)*x; } else { - s0 += Real(-0.5)*x; - stencil(i,j,k)[2] += fac0*Real(0.5)*x; + s0 -= x*x; + stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); + stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); } - if (!fine_mask(i,j,k-1) && !fine_mask(i,j,k+1)) { - s0 -= z*z; - stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); - stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); - } else if (!fine_mask(i,j,k-1)) { + if ( fine_mask(i,j,k-1) || + !crse_mask(i,j,k-1)) + { + s0 += Real(-0.5)*z; + stencil(i,j,k)[6] += fac0*Real(0.5)*z; + } else if ( fine_mask(i,j,k+1) || + !crse_mask(i,j,k+1)) { s0 += Real(0.5)*z; stencil(i,j,k)[5] += fac0*Real(-0.5)*z; } else { - s0 += Real(-0.5)*z; - stencil(i,j,k)[6] += fac0*Real(0.5)*z; + s0 -= z*z; + stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); + stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); } stencil(i,j,k)[0] += fac0*s0; @@ -605,28 +647,34 @@ void hypmlabeclap_c2f (int i, int j, int k, Real fac0 = fac*cc[0]; Real s0 = Real(1.0); - if (!fine_mask(i-1,j,k) && !fine_mask(i+1,j,k)) { - s0 -= x*x; - stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); - stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); - } else if (!fine_mask(i-1,j,k)) { + if ( fine_mask(i-1,j,k) || + !crse_mask(i-1,j,k)) + { + s0 += Real(-0.5)*x; + stencil(i,j,k)[2] += fac0*Real(0.5)*x; + } else if ( fine_mask(i+1,j,k) || + !crse_mask(i+1,j,k)) { s0 += Real(0.5)*x; stencil(i,j,k)[1] += fac0*Real(-0.5)*x; } else { - s0 += Real(-0.5)*x; - stencil(i,j,k)[2] += fac0*Real(0.5)*x; + s0 -= x*x; + stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); + stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); } - if (!fine_mask(i,j-1,k) && !fine_mask(i,j+1,k)) { - s0 -= y*y; - stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); - stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); - } else if (!fine_mask(i,j-1,k)) { + if ( fine_mask(i,j-1,k) || + !crse_mask(i,j-1,k)) + { + s0 += Real(-0.5)*y; + stencil(i,j,k)[4] += fac0*Real(0.5)*y; + } else if ( fine_mask(i,j+1,k) || + !crse_mask(i,j+1,k)) { s0 += Real(0.5)*y; stencil(i,j,k)[3] += fac0*Real(-0.5)*y; } else { - s0 += Real(-0.5)*y; - stencil(i,j,k)[4] += fac0*Real(0.5)*y; + s0 -= y*y; + stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); + stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); } stencil(i,j,k)[0] += fac0*s0; diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_K.H b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_K.H index ea38bf5037..129a6a989a 100644 --- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_K.H +++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_K.H @@ -22,7 +22,7 @@ void hypmlabeclap_mat (GpuArray& sten, int i, int j, in GpuArray, AMREX_SPACEDIM*2> const& bcmsk, GpuArray, AMREX_SPACEDIM*2> const& bcval, GpuArray, AMREX_SPACEDIM*2> const& bcrhs, - int level) + int level, IntVect const& fixed_pt) { Real bxm = b[0] ? b[0](i ,j ,k ) : Real(1.0); Real bxp = b[0] ? b[0](i+1,j ,k ) : Real(1.0); @@ -223,6 +223,12 @@ void hypmlabeclap_mat (GpuArray& sten, int i, int j, in } #endif + + if (fixed_pt == IntVect(AMREX_D_DECL(i,j,k))) { + for (int n = 1; n < 2*AMREX_SPACEDIM+1; ++n) { + sten[n] = Real(0.0); + } + } } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H index 536d4c82b0..783d17bbac 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H @@ -86,6 +86,8 @@ public: void solutionResidual (int amrlev, MF& resid, MF& x, const MF& b, const MF* crse_bcdata=nullptr) override; + void prepareForFluxes (int amrlev, const MF* crse_bcdata = nullptr) override; + void correctionResidual (int amrlev, int mglev, MF& resid, MF& x, const MF& b, BCMode bc_mode, const MF* crse_bcdata=nullptr) final; @@ -133,6 +135,10 @@ public: Vector > m_robin_bcval; +#ifdef AMREX_USE_HYPRE + void setInterpBndryHalfWidth (int w) { m_interpbndry_halfwidth = w; } +#endif + protected: bool m_has_metric_term = false; @@ -202,6 +208,8 @@ private: void computeVolInv () const; mutable Vector > m_volinv; // used by solvability fix + + int m_interpbndry_halfwidth = 2; }; template @@ -472,7 +480,9 @@ MLCellLinOpT::defineBC () bc_data.setVal(0.0); m_bndry_cor[amrlev]->setBndryValues(*m_crse_cor_br[amrlev], 0, bc_data, 0, 0, ncomp, - IntVect(this->m_amr_ref_ratio[amrlev-1])); + IntVect(this->m_amr_ref_ratio[amrlev-1]), + InterpBndryDataT::IBD_max_order_DEF, + m_interpbndry_halfwidth); Vector > bclohi (ncomp,Array{{AMREX_D_DECL(BCType::Dirichlet, @@ -544,7 +554,9 @@ MLCellLinOpT::setLevelBC (int amrlev, const MF* a_levelbcdata, const MF* rob m_crse_sol_br[amrlev]->setVal(RT(0.0)); } m_bndry_sol[amrlev]->setBndryValues(*m_crse_sol_br[amrlev], 0, - bcdata, 0, 0, ncomp, br_ref_ratio); + bcdata, 0, 0, ncomp, br_ref_ratio, + InterpBndryDataT::IBD_max_order_DEF, + m_interpbndry_halfwidth); br_ref_ratio = this->m_coarse_data_crse_ratio; } else @@ -639,7 +651,9 @@ MLCellLinOpT::updateSolBC (int amrlev, const MF& crse_bcdata) const m_crse_sol_br[amrlev]->copyFrom(crse_bcdata, 0, 0, 0, ncomp, this->m_geom[amrlev-1][0].periodicity()); m_bndry_sol[amrlev]->updateBndryValues(*m_crse_sol_br[amrlev], 0, 0, ncomp, - IntVect(this->m_amr_ref_ratio[amrlev-1])); + IntVect(this->m_amr_ref_ratio[amrlev-1]), + InterpBndryDataT::IBD_max_order_DEF, + m_interpbndry_halfwidth); } template @@ -652,7 +666,9 @@ MLCellLinOpT::updateCorBC (int amrlev, const MF& crse_bcdata) const m_crse_cor_br[amrlev]->copyFrom(crse_bcdata, 0, 0, 0, ncomp, this->m_geom[amrlev-1][0].periodicity()); m_bndry_cor[amrlev]->updateBndryValues(*m_crse_cor_br[amrlev], 0, 0, ncomp, - IntVect(this->m_amr_ref_ratio[amrlev-1])); + IntVect(this->m_amr_ref_ratio[amrlev-1]), + InterpBndryDataT::IBD_max_order_DEF, + m_interpbndry_halfwidth); } template @@ -1210,6 +1226,15 @@ MLCellLinOpT::solutionResidual (int amrlev, MF& resid, MF& x, const MF& b, MF::Xpay(resid, RT(-1.0), b, 0, 0, ncomp, IntVect(0)); } +template +void +MLCellLinOpT::prepareForFluxes (int amrlev, const MF* crse_bcdata) +{ + if (crse_bcdata != nullptr) { + updateSolBC(amrlev, *crse_bcdata); + } +} + template void MLCellLinOpT::correctionResidual (int amrlev, int mglev, MF& resid, MF& x, const MF& b, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H index 03da0874e7..3cc623b761 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H @@ -376,6 +376,8 @@ public: virtual void solutionResidual (int amrlev, MF& resid, MF& x, const MF& b, const MF* crse_bcdata=nullptr) = 0; + virtual void prepareForFluxes (int /*amrlev*/, const MF* /*crse_bcdata*/ = nullptr) {} + /** * \brief Compute residual for the residual-correction form, resid = b - L(x) * diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H index 5078383d5c..78b2ffdd3d 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H @@ -164,6 +164,8 @@ public: void setHypreStrongThreshold (Real t) noexcept {hypre_strong_threshold = t;} #endif + void prepareForFluxes (Vector const& a_sol); + template void prepareForSolve (Vector const& a_sol, Vector const& a_rhs); @@ -538,6 +540,16 @@ MLMGT::solve (const Vector& a_sol, const Vector& a_rhs, return composite_norminf; } +template +void +MLMGT::prepareForFluxes (Vector const& a_sol) +{ + for (int alev = finest_amr_lev; alev >= 0; --alev) { + const MF* crse_bcdata = (alev > 0) ? a_sol[alev-1] : nullptr; + linop.prepareForFluxes(alev, crse_bcdata); + } +} + template template void