From 97512176d8e5ac0adc566f0e224fcf3127327c7f Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 1 Mar 2024 12:37:19 -0800 Subject: [PATCH 1/4] Curl Curl solver: 4-color Gauss-Seidel smoother (#3778) This implements the 4-color Gauss-Seidel smoother of Li et. al. 2020. "An Efficient Preconditioner for 3-D Finite Difference Modeling of the Electromagnetic Diffusion Process in the Frequency Domain", IEEE Transactions on Geoscience and Remote Sensing, 58, 500-509. --- Src/Base/AMReX_GpuMemory.H | 2 +- Src/Base/AMReX_LUSolver.H | 146 +++++ Src/Base/CMakeLists.txt | 2 + Src/Base/Make.package | 3 + Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H | 25 +- Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp | 472 ++++++++++----- Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H | 635 +++++++++++++++----- Tests/LinearSolvers/CurlCurl/MyTest.H | 2 +- Tests/LinearSolvers/CurlCurl/MyTest.cpp | 3 + Tests/LinearSolvers/CurlCurl/inputs | 2 +- 10 files changed, 959 insertions(+), 333 deletions(-) create mode 100644 Src/Base/AMReX_LUSolver.H diff --git a/Src/Base/AMReX_GpuMemory.H b/Src/Base/AMReX_GpuMemory.H index 1ffee38701..7f2b937fdf 100644 --- a/Src/Base/AMReX_GpuMemory.H +++ b/Src/Base/AMReX_GpuMemory.H @@ -104,7 +104,7 @@ private: #else - DeviceScalar (T init_val) : d(init_val) {} + DeviceScalar (T const& init_val) : d(init_val) {} DeviceScalar () = default; ~DeviceScalar () = default; diff --git a/Src/Base/AMReX_LUSolver.H b/Src/Base/AMReX_LUSolver.H new file mode 100644 index 0000000000..bd69822ea5 --- /dev/null +++ b/Src/Base/AMReX_LUSolver.H @@ -0,0 +1,146 @@ +#ifndef AMREX_LU_SOLVER_H_ +#define AMREX_LU_SOLVER_H_ +#include + +#include +#include +#include +#include + +namespace amrex { + +// https://en.wikipedia.org/wiki/LU_decomposition + +template +class LUSolver +{ +public: + + LUSolver () = default; + + LUSolver (Array2D const& a_mat); + + void define (Array2D const& a_mat); + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void operator() (T* AMREX_RESTRICT x, T const* AMREX_RESTRICT b) const + { + for (int i = 0; i < N; ++i) { + x[i] = b[m_piv(i)]; + for (int k = 0; k < i; ++k) { + x[i] -= m_mat(i,k) * x[k]; + } + } + + for (int i = N-1; i >= 0; --i) { + for (int k = i+1; k < N; ++k) { + x[i] -= m_mat(i,k) * x[k]; + } + x[i] *= m_mat(i,i); + } + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE + Array2D invert () const + { + Array2D IA; + for (int j = 0; j < N; ++j) { + for (int i = 0; i < N; ++i) { + IA(i,j) = (m_piv(i) == j) ? T(1.0) : T(0.0); + for (int k = 0; k < i; ++k) { + IA(i,j) -= m_mat(i,k) * IA(k,j); + } + } + for (int i = N-1; i >= 0; --i) { + for (int k = i+1; k < N; ++k) { + IA(i,j) -= m_mat(i,k) * IA(k,j); + } + IA(i,j) *= m_mat(i,i); + } + } + return IA; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE + T determinant () const + { + T det = m_mat(0,0); + for (int i = 1; i < N; ++i) { + det *= m_mat(i,i); + } + det = T(1.0) / det; + return (m_npivs % 2 == 0) ? det : -det; + } + +private: + + void define_innard (); + + Array2D m_mat; + Array1D m_piv; + int m_npivs = 0; +}; + +template +LUSolver::LUSolver (Array2D const& a_mat) + : m_mat(a_mat) +{ + define_innard(); +} + +template +void LUSolver::define (Array2D const& a_mat) +{ + m_mat = a_mat; + define_innard(); +} + +template +void LUSolver::define_innard () +{ + static_assert(N > 1); + static_assert(std::is_floating_point_v); + + for (int i = 0; i < N; ++i) { m_piv(i) = i; } + m_npivs = 0; + + for (int i = 0; i < N; ++i) { + T maxA = 0; + int imax = i; + + for (int k = i; k < N; ++k) { + auto const absA = std::abs(m_mat(k,i)); + if (absA > maxA) { + maxA = absA; + imax = k; + } + } + + if (maxA < std::numeric_limits::min()) { + amrex::Abort("LUSolver: matrix is degenerate"); + } + + if (imax != i) { + std::swap(m_piv(i), m_piv(imax)); + for (int j = 0; j < N; ++j) { + std::swap(m_mat(i,j), m_mat(imax,j)); + } + ++m_npivs; + } + + for (int j = i+1; j < N; ++j) { + m_mat(j,i) /= m_mat(i,i); + for (int k = i+1; k < N; ++k) { + m_mat(j,k) -= m_mat(j,i) * m_mat(i,k); + } + } + } + + for (int i = 0; i < N; ++i) { + m_mat(i,i) = T(1) / m_mat(i,i); + } +} + +} + +#endif diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt index 740ff03c77..cebd1f9bce 100644 --- a/Src/Base/CMakeLists.txt +++ b/Src/Base/CMakeLists.txt @@ -270,6 +270,8 @@ foreach(D IN LISTS AMReX_SPACEDIM) Parser/amrex_iparser.tab.cpp Parser/amrex_iparser.tab.nolint.H Parser/amrex_iparser.tab.h + # Dense linear algebra solver using LU decomposition + AMReX_LUSolver.H # AMReX Hydro ----------------------------------------------------- AMReX_Slopes_K.H # Forward declaration ----------------------------------------------------- diff --git a/Src/Base/Make.package b/Src/Base/Make.package index f45256b347..dfbfb4f03a 100644 --- a/Src/Base/Make.package +++ b/Src/Base/Make.package @@ -326,5 +326,8 @@ CEXE_sources += AMReX_IParser_Exe.cpp CEXE_headers += AMReX_IParser.H CEXE_sources += AMReX_IParser.cpp +# Dense linear algebra solver using LU decomposition +CEXE_headers += AMReX_LUSolver.H + VPATH_LOCATIONS += $(AMREX_HOME)/Src/Base/Parser INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Base/Parser diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H index 38a0ff5c5c..3e8f06c58e 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H @@ -3,6 +3,7 @@ #include #include +#include namespace amrex { @@ -12,8 +13,16 @@ namespace amrex { * Here E is an Array of 3 MultiFabs on staggered grid, alpha is a positive * scalar, and beta is a non-negative scalar. * + * It's the caller's responsibility to make sure rhs has consistent nodal + * data. If needed, one could use FabArray::OverrideSync to synchronize + * nodal data. + * + * The smoother is based on the 4-color Gauss-Seidel smoother of Li + * et. al. 2020. "An Efficient Preconditioner for 3-D Finite Difference + * Modeling of the Electromagnetic Diffusion Process in the Frequency + * Domain", IEEE Transactions on Geoscience and Remote Sensing, 58, 500-509. + * * TODO: If beta is zero, the system could be singular. - * TODO: Try different restriction & interpolation strategies. */ class MLCurlCurl : public MLLinOpT > @@ -92,21 +101,20 @@ public: // public for cuda - void smooth (int amrlev, int mglev, MF& sol, MultiFab const& rhs, - int redblack) const; + void smooth4 (int amrlev, int mglev, MF& sol, MF const& rhs, int color) const; void compresid (int amrlev, int mglev, MF& resid, MF const& b) const; - void applyPhysBC (int amrlev, int mglev, MultiFab& mf) const; + void applyPhysBC (int amrlev, int mglev, MultiFab& mf, CurlCurlStateType type) const; private: - void applyBC (int amrlev, int mglev, MF& in) const; - void applyBC (int amrlev, int mglev, MultiFab& mf) const; + void applyBC (int amrlev, int mglev, MF& in, CurlCurlStateType type) const; [[nodiscard]] iMultiFab const& getDotMask (int amrlev, int mglev, int idim) const; - [[nodiscard]] int getDirichlet (int amrlev, int mglev, int idim, int face) const; + [[nodiscard]] CurlCurlDirichletInfo getDirichletInfo (int amrlev, int mglev) const; + [[nodiscard]] CurlCurlSymmetryInfo getSymmetryInfo (int amrlev, int mglev) const; RT m_alpha = std::numeric_limits::lowest(); RT m_beta = std::numeric_limits::lowest(); @@ -118,9 +126,10 @@ private: {IntVect(0,1), IntVect(1,0), IntVect(1,1)}; #endif - Vector,3>>> m_dirichlet_mask; mutable Vector,3>>> m_dotmask; static constexpr int m_ncomp = 1; + Vector>>>> m_lusolver; }; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp index 11096bbc59..6918dfca96 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp @@ -1,6 +1,4 @@ - #include -#include namespace amrex { @@ -24,9 +22,9 @@ void MLCurlCurl::define (const Vector& a_geom, m_dotmask[amrlev].resize(this->m_num_mg_levels[amrlev]); } - m_dirichlet_mask.resize(this->m_num_amr_levels); + m_lusolver.resize(this->m_num_amr_levels); for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { - m_dirichlet_mask[amrlev].resize(this->m_num_mg_levels[amrlev]); + m_lusolver[amrlev].resize(this->m_num_mg_levels[amrlev]); } } @@ -48,7 +46,9 @@ void MLCurlCurl::restriction (int amrlev, int cmglev, MF& crse, MF& fine) const IntVect ratio = (amrlev > 0) ? IntVect(2) : this->mg_coarsen_ratio_vec[cmglev-1]; AMREX_ALWAYS_ASSERT(ratio == 2); - applyBC(amrlev, cmglev-1, fine); + applyBC(amrlev, cmglev-1, fine, CurlCurlStateType::r); + + auto dinfo = getDirichletInfo(amrlev,cmglev-1); for (int idim = 0; idim < 3; ++idim) { bool need_parallel_copy = !amrex::isMFIterSafe(crse[idim], fine[idim]); @@ -62,10 +62,9 @@ void MLCurlCurl::restriction (int amrlev, int cmglev, MF& crse, MF& fine) const auto const& crsema = pcrse->arrays(); auto const& finema = fine[idim].const_arrays(); - auto const& dmsk = m_dirichlet_mask[amrlev][cmglev-1][idim]->const_arrays(); ParallelFor(*pcrse, [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) { - mlcurlcurl_restriction(idim,i,j,k,crsema[bno],finema[bno],dmsk[bno]); + mlcurlcurl_restriction(idim,i,j,k,crsema[bno],finema[bno],dinfo); }); Gpu::streamSynchronize(); @@ -81,6 +80,8 @@ void MLCurlCurl::interpolation (int amrlev, int fmglev, MF& fine, IntVect ratio = (amrlev > 0) ? IntVect(2) : this->mg_coarsen_ratio_vec[fmglev]; AMREX_ALWAYS_ASSERT(ratio == 2); + auto dinfo = getDirichletInfo(amrlev,fmglev); + for (int idim = 0; idim < 3; ++idim) { bool need_parallel_copy = !amrex::isMFIterSafe(crse[idim], fine[idim]); MultiFab cfine; @@ -93,10 +94,9 @@ void MLCurlCurl::interpolation (int amrlev, int fmglev, MF& fine, } auto const& finema = fine[idim].arrays(); auto const& crsema = cmf->const_arrays(); - auto const& dmsk = m_dirichlet_mask[amrlev][fmglev][idim]->const_arrays(); ParallelFor(fine[idim], [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) { - if (!dmsk[bno](i,j,k)) { + if (!dinfo.is_dirichlet_edge(idim,i,j,k)) { mlcurlcurl_interpadd(idim,i,j,k,finema[bno],crsema[bno]); } }); @@ -108,12 +108,16 @@ void MLCurlCurl::apply (int amrlev, int mglev, MF& out, MF& in, BCMode /*bc_mode*/, StateMode /*s_mode*/, const MLMGBndryT* /*bndry*/) const { - applyBC(amrlev, mglev, in); + applyBC(amrlev, mglev, in, CurlCurlStateType::x); - auto const& dxinv = this->m_geom[amrlev][mglev].InvCellSizeArray(); - auto const a = m_alpha; + auto adxinv = this->m_geom[amrlev][mglev].InvCellSizeArray(); + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + adxinv[idim] *= std::sqrt(m_alpha); + } auto const b = m_beta; + auto dinfo = getDirichletInfo(amrlev,mglev); + #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif @@ -128,32 +132,29 @@ MLCurlCurl::apply (int amrlev, int mglev, MF& out, MF& in, BCMode /*bc_mode*/, auto const& xin = in[0].array(mfi); auto const& yin = in[1].array(mfi); auto const& zin = in[2].array(mfi); - auto const& mx = m_dirichlet_mask[amrlev][mglev][0]->const_array(mfi); - auto const& my = m_dirichlet_mask[amrlev][mglev][1]->const_array(mfi); - auto const& mz = m_dirichlet_mask[amrlev][mglev][2]->const_array(mfi); amrex::ParallelFor(xbx, ybx, zbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - if (mx(i,j,k)) { + if (dinfo.is_dirichlet_x_edge(i,j,k)) { xout(i,j,k) = Real(0.0); } else { - mlcurlcurl_adotx_x(i,j,k,xout,xin,yin,zin,a,b,dxinv); + mlcurlcurl_adotx_x(i,j,k,xout,xin,yin,zin,b,adxinv); } }, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - if (my(i,j,k)) { + if (dinfo.is_dirichlet_y_edge(i,j,k)) { yout(i,j,k) = Real(0.0); } else { - mlcurlcurl_adotx_y(i,j,k,yout,xin,yin,zin,a,b,dxinv); + mlcurlcurl_adotx_y(i,j,k,yout,xin,yin,zin,b,adxinv); } }, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - if (mz(i,j,k)) { + if (dinfo.is_dirichlet_z_edge(i,j,k)) { zout(i,j,k) = Real(0.0); } else { - mlcurlcurl_adotx_z(i,j,k,zout,xin,yin,zin,a,b,dxinv); + mlcurlcurl_adotx_z(i,j,k,zout,xin,yin,zin,b,adxinv); } }); } @@ -162,84 +163,54 @@ MLCurlCurl::apply (int amrlev, int mglev, MF& out, MF& in, BCMode /*bc_mode*/, void MLCurlCurl::smooth (int amrlev, int mglev, MF& sol, const MF& rhs, bool skip_fillboundary) const { - if (!skip_fillboundary) { - applyBC(amrlev, mglev, sol); - } - - smooth(amrlev, mglev, sol, rhs[0], 0); // Ex red - applyBC(amrlev, mglev, sol[0]); + AMREX_ASSERT(rhs[0].nGrowVect().allGE(IntVect(1))); - smooth(amrlev, mglev, sol, rhs[1], 0); // Ey red - applyBC(amrlev, mglev, sol[1]); + applyBC(amrlev, mglev, const_cast(rhs), CurlCurlStateType::b); - smooth(amrlev, mglev, sol, rhs[2], 0); // Ez red - applyBC(amrlev, mglev, sol[2]); + for (int color = 0; color < 4; ++color) { + if (!skip_fillboundary) { + applyBC(amrlev, mglev, sol, CurlCurlStateType::x); + } + skip_fillboundary = false; + smooth4(amrlev, mglev, sol, rhs, color); + } +} - smooth(amrlev, mglev, sol, rhs[0], 1); // Ex black - applyBC(amrlev, mglev, sol[0]); +void MLCurlCurl::smooth4 (int amrlev, int mglev, MF& sol, MF const& rhs, + int color) const +{ + auto const& ex = sol[0].arrays(); + auto const& ey = sol[1].arrays(); + auto const& ez = sol[2].arrays(); + auto const& rhsx = rhs[0].const_arrays(); + auto const& rhsy = rhs[1].const_arrays(); + auto const& rhsz = rhs[2].const_arrays(); - smooth(amrlev, mglev, sol, rhs[1], 1); // Ey black -#if (AMREX_SPACEDIM == 3) - applyBC(amrlev, mglev, sol[1]); +#if (AMREX_SPACEDIM == 2) + auto b = m_beta; #endif - smooth(amrlev, mglev, sol, rhs[2], 1); // Ez black - - for (int idim = 0; idim < 3; ++idim) { - amrex::OverrideSync(sol[idim], getDotMask(amrlev,mglev,idim), - this->m_geom[amrlev][mglev].periodicity()); + auto adxinv = this->m_geom[amrlev][mglev].InvCellSizeArray(); + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + adxinv[idim] *= std::sqrt(m_alpha); } -} -void MLCurlCurl::smooth (int amrlev, int mglev, MF& sol, MultiFab const& rhs, - int redblack) const -{ - auto const& dxinv = this->m_geom[amrlev][mglev].InvCellSizeArray(); - auto const a = m_alpha; - auto const b = m_beta; + auto* plusolver = m_lusolver[amrlev][mglev]->dataPtr(); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(rhs,TilingIfNotGPU()); mfi.isValid(); ++mfi) + auto dinfo = getDirichletInfo(amrlev,mglev); + auto sinfo = getSymmetryInfo(amrlev,mglev); + + MultiFab nmf(amrex::convert(rhs[0].boxArray(),IntVect(1)), + rhs[0].DistributionMap(), 1, 0, MFInfo().SetAlloc(false)); + ParallelFor(nmf, [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) { - Box const& bx = mfi.tilebox(); - auto const& rh = rhs.const_array(mfi); - if (rhs.ixType() == sol[0].ixType()) { - auto const& ex = sol[0].array(mfi); - auto const& ey = sol[1].const_array(mfi); - auto const& ez = sol[2].const_array(mfi); - auto const& mx = m_dirichlet_mask[amrlev][mglev][0]->const_array(mfi); - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - if (!mx(i,j,k)) { - mlcurlcurl_gsrb_x(i,j,k,ex,ey,ez,rh,a,b,dxinv,redblack); - } - }); - } else if (rhs.ixType() == sol[1].ixType()) { - auto const& ex = sol[0].const_array(mfi); - auto const& ey = sol[1].array(mfi); - auto const& ez = sol[2].const_array(mfi); - auto const& my = m_dirichlet_mask[amrlev][mglev][1]->const_array(mfi); - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - if (!my(i,j,k)) { - mlcurlcurl_gsrb_y(i,j,k,ex,ey,ez,rh,a,b,dxinv,redblack); - } - }); - } else { - auto const& ex = sol[0].const_array(mfi); - auto const& ey = sol[1].const_array(mfi); - auto const& ez = sol[2].array(mfi); - auto const& mz = m_dirichlet_mask[amrlev][mglev][2]->const_array(mfi); - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - if (!mz(i,j,k)) { - mlcurlcurl_gsrb_z(i,j,k,ex,ey,ez,rh,a,b,dxinv,redblack); - } - }); - } - } + mlcurlcurl_gs4(i,j,k,ex[bno],ey[bno],ez[bno],rhsx[bno],rhsy[bno],rhsz[bno], +#if (AMREX_SPACEDIM == 2) + b, +#endif + adxinv,color,*plusolver,dinfo,sinfo); + }); + Gpu::streamSynchronize(); } void MLCurlCurl::solutionResidual (int amrlev, MF& resid, MF& x, const MF& b, @@ -262,6 +233,8 @@ void MLCurlCurl::correctionResidual (int amrlev, int mglev, MF& resid, MF& x, void MLCurlCurl::compresid (int amrlev, int mglev, MF& resid, MF const& b) const { + auto dinfo = getDirichletInfo(amrlev,mglev); + #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif @@ -276,13 +249,10 @@ void MLCurlCurl::compresid (int amrlev, int mglev, MF& resid, MF const& b) const auto const& bx = b[0].array(mfi); auto const& by = b[1].array(mfi); auto const& bz = b[2].array(mfi); - auto const& mx = m_dirichlet_mask[amrlev][mglev][0]->const_array(mfi); - auto const& my = m_dirichlet_mask[amrlev][mglev][1]->const_array(mfi); - auto const& mz = m_dirichlet_mask[amrlev][mglev][2]->const_array(mfi); amrex::ParallelFor(xbx, ybx, zbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - if (mx(i,j,k)) { + if (dinfo.is_dirichlet_x_edge(i,j,k)) { resx(i,j,k) = Real(0.0); } else { resx(i,j,k) = bx(i,j,k) - resx(i,j,k); @@ -290,7 +260,7 @@ void MLCurlCurl::compresid (int amrlev, int mglev, MF& resid, MF const& b) const }, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - if (my(i,j,k)) { + if (dinfo.is_dirichlet_y_edge(i,j,k)) { resy(i,j,k) = Real(0.0); } else { resy(i,j,k) = by(i,j,k) - resy(i,j,k); @@ -298,7 +268,7 @@ void MLCurlCurl::compresid (int amrlev, int mglev, MF& resid, MF const& b) const }, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - if (mz(i,j,k)) { + if (dinfo.is_dirichlet_z_edge(i,j,k)) { resz(i,j,k) = Real(0.0); } else { resz(i,j,k) = bz(i,j,k) - resz(i,j,k); @@ -309,50 +279,85 @@ void MLCurlCurl::compresid (int amrlev, int mglev, MF& resid, MF const& b) const void MLCurlCurl::prepareForSolve () { - for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { for (int mglev = 0; mglev < m_num_mg_levels[amrlev]; ++mglev) { - for (int idim = 0; idim < 3; ++idim) { - m_dirichlet_mask[amrlev][mglev][idim] = std::make_unique - (amrex::convert(this->m_grids[amrlev][mglev], m_etype[idim]), - this->m_dmap[amrlev][mglev], 1, 0); - } - - int const dirichlet_xlo = getDirichlet(amrlev, mglev, 0, 0); - int const dirichlet_xhi = getDirichlet(amrlev, mglev, 0, 1); - int const dirichlet_ylo = getDirichlet(amrlev, mglev, 1, 0); - int const dirichlet_yhi = getDirichlet(amrlev, mglev, 1, 1); - int const dirichlet_zlo = getDirichlet(amrlev, mglev, 2, 0); - int const dirichlet_zhi = getDirichlet(amrlev, mglev, 2, 1); - -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) + auto const& dxinv = this->m_geom[amrlev][mglev].InvCellSizeArray(); + Real dxx = dxinv[0]*dxinv[0]; + Real dyy = dxinv[1]*dxinv[1]; + Real dxy = dxinv[0]*dxinv[1]; +#if (AMREX_SPACEDIM == 2) + Array2D A + {m_alpha*dyy*Real(2.0) + m_beta, + Real(0.0), + -m_alpha*dxy, + m_alpha*dxy, + // + Real(0.0), + m_alpha*dyy*Real(2.0) + m_beta, + m_alpha*dxy, + -m_alpha*dxy, + // + -m_alpha*dxy, + m_alpha*dxy, + m_alpha*dxx*Real(2.0) + m_beta, + Real(0.0), + // + m_alpha*dxy, + -m_alpha*dxy, + Real(0.0), + m_alpha*dxx*Real(2.0) + m_beta}; +#else + Real dzz = dxinv[2]*dxinv[2]; + Real dxz = dxinv[0]*dxinv[2]; + Real dyz = dxinv[1]*dxinv[2]; + + Array2D A + {m_alpha*(dyy+dzz)*Real(2.0) + m_beta, + Real(0.0), + -m_alpha*dxy, + m_alpha*dxy, + -m_alpha*dxz, + m_alpha*dxz, + // + Real(0.0), + m_alpha*(dyy+dzz)*Real(2.0) + m_beta, + m_alpha*dxy, + -m_alpha*dxy, + m_alpha*dxz, + -m_alpha*dxz, + // + -m_alpha*dxy, + m_alpha*dxy, + m_alpha*(dxx+dzz)*Real(2.0) + m_beta, + Real(0.0), + -m_alpha*dyz, + m_alpha*dyz, + // + m_alpha*dxy, + -m_alpha*dxy, + Real(0.0), + m_alpha*(dxx+dzz)*Real(2.0) + m_beta, + m_alpha*dyz, + -m_alpha*dyz, + // + -m_alpha*dxz, + m_alpha*dxz, + -m_alpha*dyz, + m_alpha*dyz, + m_alpha*(dxx+dyy)*Real(2.0) + m_beta, + Real(0.0), + // + m_alpha*dxz, + -m_alpha*dxz, + m_alpha*dyz, + -m_alpha*dyz, + Real(0.0), + m_alpha*(dxx+dyy)*Real(2.0) + m_beta}; #endif - for (MFIter mfi(*m_dirichlet_mask[amrlev][mglev][0], - TilingIfNotGPU()); mfi.isValid(); ++mfi) - { - Box const& xbx = mfi.tilebox(m_etype[0]); - Box const& ybx = mfi.tilebox(m_etype[1]); - Box const& zbx = mfi.tilebox(m_etype[2]); - auto const& mx = m_dirichlet_mask[amrlev][mglev][0]->array(mfi); - auto const& my = m_dirichlet_mask[amrlev][mglev][1]->array(mfi); - auto const& mz = m_dirichlet_mask[amrlev][mglev][2]->array(mfi); - amrex::ParallelFor(xbx, ybx, zbx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - mx(i,j,k) = int(j == dirichlet_ylo || j == dirichlet_yhi || - k == dirichlet_zlo || k == dirichlet_zhi); - }, - [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - my(i,j,k) = int(i == dirichlet_xlo || i == dirichlet_xhi || - k == dirichlet_zlo || k == dirichlet_zhi); - }, - [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - mz(i,j,k) = (i == dirichlet_xlo || i == dirichlet_xhi || - j == dirichlet_ylo || j == dirichlet_yhi); - }); - } + + m_lusolver[amrlev][mglev] + = std::make_unique>>(A); } } } @@ -446,21 +451,24 @@ MLCurlCurl::makeCoarseAmr (int famrlev, IntVect const& ng) const return r; } -void MLCurlCurl::applyBC (int amrlev, int mglev, MF& in) const +void MLCurlCurl::applyBC (int amrlev, int mglev, MF& in, CurlCurlStateType type) const { - Vector mfs{in.data(),&(in[1]),&(in[2])}; + int nmfs = 3; +#if (AMREX_SPACEDIM == 2) + if (CurlCurlStateType::b == type) { + nmfs = 2; // no need to applyBC on Ez + } +#endif + Vector mfs(nmfs); + for (int imf = 0; imf < nmfs; ++imf) { + mfs[imf] = in.data() + imf; + } FillBoundary(mfs, this->m_geom[amrlev][mglev].periodicity()); - for (auto& mf : in) { - applyPhysBC(amrlev, mglev, mf); + for (auto* mf : mfs) { + applyPhysBC(amrlev, mglev, *mf, type); } } -void MLCurlCurl::applyBC (int amrlev, int mglev, MultiFab& mf) const -{ - mf.FillBoundary(this->m_geom[amrlev][mglev].periodicity()); - applyPhysBC(amrlev, mglev, mf); -} - #ifdef AMREX_USE_GPU struct MLCurlCurlBCTag { Array4 fab; @@ -470,12 +478,25 @@ struct MLCurlCurlBCTag { [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const& box() const noexcept { return bx; } }; + +struct MLCurlCurlEdgeBCTag { + Array4 fab; + Box bx; + Dim3 offset; + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + Box const& box() const noexcept { return bx; } +}; #endif -void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf) const +void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf, CurlCurlStateType type) const { + if (CurlCurlStateType::b == type) { return; } + auto const idxtype = mf.ixType(); Box const domain = amrex::convert(this->m_geom[amrlev][mglev].Domain(), idxtype); + Box const gdomain = amrex::convert + (this->m_geom[amrlev][mglev].growPeriodicDomain(1), idxtype); MFItInfo mfi_info{}; @@ -496,10 +517,24 @@ void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf) const bool is_symmetric = face.isLow() ? m_lobc[0][idim] == LinOpBCType::symmetry : m_hibc[0][idim] == LinOpBCType::symmetry; - if (domain[face] == vbx[face] && is_symmetric) { + if (domain[face] == vbx[face] && is_symmetric && + ((type == CurlCurlStateType::x) || + (type == CurlCurlStateType::r && idxtype.nodeCentered(idim)))) // transverse direction only + { Box b = vbx; - int shift = face.isLow() ? -1 : 1; - b.setRange(idim, domain[face] + shift, 1); + for (int jdim = 0; jdim < AMREX_SPACEDIM; ++jdim) { + if (jdim == idim) { + int shift = face.isLow() ? -1 : 1; + b.setRange(jdim, domain[face] + shift, 1); + } else { + if (b.smallEnd(jdim) > gdomain.smallEnd(jdim)) { + b.growLo(jdim); + } + if (b.bigEnd(jdim) < gdomain.bigEnd(jdim)) { + b.growHi(jdim); + } + } + } #ifdef AMREX_USE_GPU tags.emplace_back(MLCurlCurlBCTag{a,b,face}); #else @@ -519,6 +554,72 @@ void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf) const mlcurlcurl_bc_symmetry(i, j, k, tag.face, idxtype, tag.fab); }); #endif + + if (CurlCurlStateType::r == type) { // fix domain edges + auto sinfo = getSymmetryInfo(amrlev,mglev); + +#ifdef AMREX_USE_GPU + Vector tags2; +#endif + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(mf,mfi_info); mfi.isValid(); ++mfi) { + auto const& vbx = mfi.validbox(); + auto const& a = mf.array(mfi); + for (int idim = 0; idim < AMREX_SPACEDIM-1; ++idim) { + for (int jdim = idim+1; jdim < AMREX_SPACEDIM; ++jdim) { + if (idxtype.nodeCentered(idim) && + idxtype.nodeCentered(jdim)) + { + for (int iside = 0; iside < 2; ++iside) { + int ii = (iside == 0) ? vbx.smallEnd(idim) : vbx.bigEnd(idim); + for (int jside = 0; jside < 2; ++jside) { + int jj = (jside == 0) ? vbx.smallEnd(jdim) : vbx.bigEnd(jdim); + if (sinfo.is_symmetric(idim,iside,ii) && + sinfo.is_symmetric(jdim,jside,jj)) + { + IntVect oiv(0); + oiv[idim] = (iside == 0) ? 2 : -2; + oiv[jdim] = (jside == 0) ? 2 : -2; + Dim3 offset = oiv.dim3(); + + Box b = vbx; + if (iside == 0) { + b.setRange(idim,vbx.smallEnd(idim)-1); + } else { + b.setRange(idim,vbx.bigEnd(idim)+1); + } + if (jside == 0) { + b.setRange(jdim,vbx.smallEnd(jdim)-1); + } else { + b.setRange(jdim,vbx.bigEnd(jdim)+1); + } +#ifdef AMREX_USE_GPU + tags2.emplace_back(MLCurlCurlEdgeBCTag{a,b,offset}); +#else + amrex::LoopOnCpu(b, [&] (int i, int j, int k) + { + a(i,j,k) = a(i+offset.x,j+offset.y,k+offset.z); + }); +#endif + } + } + } + } + } + } + } + +#ifdef AMREX_USE_GPU + ParallelFor(tags2, + [=] AMREX_GPU_DEVICE (int i, int j, int k, MLCurlCurlEdgeBCTag const& tag) + { + tag.fab(i,j,k) = tag.fab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z); + }); +#endif + } } iMultiFab const& MLCurlCurl::getDotMask (int amrlev, int mglev, int idim) const @@ -532,27 +633,72 @@ iMultiFab const& MLCurlCurl::getDotMask (int amrlev, int mglev, int idim) const return *m_dotmask[amrlev][mglev][idim]; } -int MLCurlCurl::getDirichlet (int amrlev, int mglev, int idim, int face) const +CurlCurlDirichletInfo MLCurlCurl::getDirichletInfo (int amrlev, int mglev) const { + + auto helper = [&] (int idim, int face) -> int + { #if (AMREX_SPACEDIM == 2) - if (idim == 2) { - return std::numeric_limits::lowest(); - } + if (idim == 2) { + return std::numeric_limits::lowest(); + } #endif - if (face == 0) { - if (m_lobc[0][idim] == LinOpBCType::Dirichlet) { - return m_geom[amrlev][mglev].Domain().smallEnd(idim); + if (face == 0) { + if (m_lobc[0][idim] == LinOpBCType::Dirichlet) { + return m_geom[amrlev][mglev].Domain().smallEnd(idim); + } else { + return std::numeric_limits::lowest(); + } } else { + if (m_hibc[0][idim] == LinOpBCType::Dirichlet) { + return m_geom[amrlev][mglev].Domain().bigEnd(idim) + 1; + } else { + return std::numeric_limits::max(); + } + } + }; + + return CurlCurlDirichletInfo{IntVect(AMREX_D_DECL(helper(0,0), + helper(1,0), + helper(2,0))), + IntVect(AMREX_D_DECL(helper(0,1), + helper(1,1), + helper(2,1)))}; +} + +CurlCurlSymmetryInfo MLCurlCurl::getSymmetryInfo (int amrlev, int mglev) const +{ + + auto helper = [&] (int idim, int face) -> int + { +#if (AMREX_SPACEDIM == 2) + if (idim == 2) { return std::numeric_limits::lowest(); } - } else { - if (m_hibc[0][idim] == LinOpBCType::Dirichlet) { - return m_geom[amrlev][mglev].Domain().bigEnd(idim) + 1; +#endif + + if (face == 0) { + if (m_lobc[0][idim] == LinOpBCType::symmetry) { + return m_geom[amrlev][mglev].Domain().smallEnd(idim); + } else { + return std::numeric_limits::lowest(); + } } else { - return std::numeric_limits::max(); + if (m_hibc[0][idim] == LinOpBCType::symmetry) { + return m_geom[amrlev][mglev].Domain().bigEnd(idim) + 1; + } else { + return std::numeric_limits::max(); + } } - } + }; + + return CurlCurlSymmetryInfo{IntVect(AMREX_D_DECL(helper(0,0), + helper(1,0), + helper(2,0))), + IntVect(AMREX_D_DECL(helper(0,1), + helper(1,1), + helper(2,1)))}; } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H index 9c69336f81..0816d14118 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H @@ -3,6 +3,7 @@ #include #include +#include namespace amrex { @@ -15,18 +16,309 @@ namespace amrex { * (curl E)_z : (0,0,1) */ +/* + Notes for gs4: + + Interior nodes: + + For v = [Ex(i-1,j,k), Ex(i,j,k), Ey(i,j-1,k), Ey(i,j,k), Ez(i,j,k-1), Ez(i,j,k)]^T, + we have A*v = b, where + + A00 = alpha*(dyy+dzz)*2 + beta + A01 = 0 + A02 = -alpha*dxy + A03 = alpha*dxy + A04 = -alpha*dxz + A05 = alpha*dxz + + A10 = 0 + A11 = alpha*(dyy+dzz)*2 + beta + A12 = alpha*dxy + A13 = -alpha*dxy + A14 = alpha*dxz + A15 = -alpha*dxz + + A20 = -alpha*dxy + A21 = alpha*dxy + A22 = alpha*(dxx+dzz)*2 + beta + A23 = 0 + A24 = -alpha*dyz + A25 = alpha*dyz + + A30 = alpha*dxy + A31 = -alpha*dxy + A32 = 0 + A33 = alpha*(dxx+dzz)*2 + beta + A34 = alpha*dyz + A35 = -alpha*dyz + + A40 = -alpha*dxz + A41 = alpha*dxz + A42 = -alpha*dyz + A43 = alpha*dyz + A44 = alpha*(dxx+dyy)*2 + beta + A45 = 0 + + A50 = alpha*dxz + A51 = -alpha*dxz + A52 = alpha*dyz + A53 = -alpha*dyz + A54 = 0 + A55 = alpha*(dxx+dyy)*2 + beta + + b0 = rhsx(i-1,j,k) - (alpha*ccex), where + ccex = - dyy * (ex(i-1,j-1,k ) + + ex(i-1,j+1,k )) + - dzz * (ex(i-1,j ,k+1) + + ex(i-1,j ,k-1)) + + dxy * (ey(i-1,j-1,k ) + - ey(i-1,j ,k )) + + dxz * (ez(i-1,j ,k-1) + - ez(i-1,j ,k )) + b1 = rhsx(i,j,k) - (alpha*ccex), where + ccex = - dyy * ( ex(i ,j-1,k ) + + ex(i ,j+1,k )) + - dzz * ( ex(i ,j ,k+1) + + ex(i ,j ,k-1)) + + dxy * (-ey(i+1,j-1,k ) + + ey(i+1,j ,k )) + + dxz * (-ez(i+1,j ,k-1) + + ez(i+1,j ,k )); + b2 = rhsy(i,j-1,k) - alpha*ccey, where + ccey = - dxx * (ey(i-1,j-1,k ) + + ey(i+1,j-1,k )) + - dzz * (ey(i ,j-1,k-1) + + ey(i ,j-1,k+1)) + + dxy * (ex(i-1,j-1,k ) + - ex(i ,j-1,k )) + + dyz * (ez(i ,j-1,k-1) + - ez(i ,j-1,k )) + b3 = rhsy(i,j,k) - alpha*ccey, where + ccey = - dxx * ( ey(i-1,j ,k ) + + ey(i+1,j ,k )) + - dzz * ( ey(i ,j ,k-1) + + ey(i ,j ,k+1)) + + dxy * (-ex(i-1,j+1,k ) + + ex(i ,j+1,k )) + + dyz * (-ez(i ,j+1,k-1) + + ez(i ,j+1,k )); + b4 = rhsz(i,j,k-1) - alpha*ccez, where + ccez = - dxx * (ez(i-1,j ,k-1) + + ez(i+1,j ,k-1)) + - dyy * (ez(i ,j-1,k-1) + + ez(i ,j+1,k-1)) + + dxz * (ex(i-1,j ,k-1) + - ex(i ,j ,k-1)) + + dyz * (ey(i ,j-1,k-1) + - ey(i ,j ,k-1)) + b5 = rhsz(i,j,k) - alpha*ccez, where + ccez = - dxx * ( ez(i-1,j ,k ) + + ez(i+1,j ,k )) + - dyy * ( ez(i ,j-1,k ) + + ez(i ,j+1,k )) + + dxz * (-ex(i-1,j ,k+1) + + ex(i ,j ,k+1)) + + dyz * (-ey(i ,j-1,k+1) + + ey(i ,j ,k+1)); + + dxx = 1/(dx*dx) + dyy = 1/(dy*dy) + dzz = 1/(dz*dz) + dxy = 1/(dx*dy) + dxz = 1/(dx*dz) + dyz = 1/(dy*dz) + + For Dirichlet boundary nodes, we don't do anything. + + For symmetric boundary nodes, we treat it as interior nodes because the + rhs outside the domain has been filled properly. + + In 2D, + + For v = [Ex(i-1,j,k), Ex(i,j,k), Ey(i,j-1,k), Ey(i,j,k)]^T, + we have A*v = b, where + + A00 = alpha*dyy*2 + beta + A01 = 0 + A02 = -alpha*dxy + A03 = alpha*dxy + + A10 = 0 + A11 = alpha*dyy*2 + beta + A12 = alpha*dxy + A13 = -alpha*dxy + + A20 = -alpha*dxy + A21 = alpha*dxy + A22 = alpha*dxx*2 + beta + A23 = 0 + + A30 = alpha*dxy + A31 = -alpha*dxy + A32 = 0 + A33 = alpha*dxx*2 + beta + + b0 = rhsx(i-1,j,k) - (alpha*ccex), where + ccex = - dyy * (ex(i-1,j-1,k ) + + ex(i-1,j+1,k )) + + dxy * (ey(i-1,j-1,k ) + - ey(i-1,j ,k )) + b1 = rhsx(i,j,k) - (alpha*ccex), where + ccex = - dyy * ( ex(i ,j-1,k ) + + ex(i ,j+1,k )) + + dxy * (-ey(i+1,j-1,k ) + + ey(i+1,j ,k )) + b2 = rhsy(i,j-1,k) - alpha*ccey, where + ccey = - dxx * (ey(i-1,j-1,k ) + + ey(i+1,j-1,k )) + + dxy * (ex(i-1,j-1,k ) + - ex(i ,j-1,k )) + b3 = rhsy(i,j,k) - alpha*ccey, where + ccey = - dxx * ( ey(i-1,j ,k ) + + ey(i+1,j ,k )) + + dxy * (-ex(i-1,j+1,k ) + + ex(i ,j+1,k )) +*/ + +struct CurlCurlDirichletInfo +{ + IntVect dirichlet_lo; + IntVect dirichlet_hi; + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool is_dirichlet_node (int i, int j, int k) const + { +#if (AMREX_SPACEDIM == 2) + amrex::ignore_unused(k); + return (i == dirichlet_lo[0]) || (i == dirichlet_hi[0]) + || (j == dirichlet_lo[1]) || (j == dirichlet_hi[1]); +#else + return (i == dirichlet_lo[0]) || (i == dirichlet_hi[0]) + || (j == dirichlet_lo[1]) || (j == dirichlet_hi[1]) + || (k == dirichlet_lo[2]) || (k == dirichlet_hi[2]); +#endif + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool is_dirichlet_x_edge (int, int j, int k) const + { +#if (AMREX_SPACEDIM == 2) + amrex::ignore_unused(k); + return (j == dirichlet_lo[1]) || (j == dirichlet_hi[1]); +#else + return (j == dirichlet_lo[1]) || (j == dirichlet_hi[1]) + || (k == dirichlet_lo[2]) || (k == dirichlet_hi[2]); +#endif + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool is_dirichlet_y_edge (int i, int, int k) const + { +#if (AMREX_SPACEDIM == 2) + amrex::ignore_unused(k); + return (i == dirichlet_lo[0]) || (i == dirichlet_hi[0]); +#else + return (i == dirichlet_lo[0]) || (i == dirichlet_hi[0]) + || (k == dirichlet_lo[2]) || (k == dirichlet_hi[2]); +#endif + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool is_dirichlet_z_edge (int i, int j, int) const + { + return (i == dirichlet_lo[0]) || (i == dirichlet_hi[0]) + || (j == dirichlet_lo[1]) || (j == dirichlet_hi[1]); + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool is_dirichlet_edge (int dim, int i, int j, int k) const + { + if (dim == 0) { + return is_dirichlet_x_edge(i,j,k); + } else if (dim == 1) { + return is_dirichlet_y_edge(i,j,k); + } else { + return is_dirichlet_z_edge(i,j,k); + } + } +}; + +struct CurlCurlSymmetryInfo +{ + IntVect symmetry_lo; + IntVect symmetry_hi; + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool xlo_is_symmetric (int i) const + { + return i == symmetry_lo[0]; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool xhi_is_symmetric (int i) const + { + return i == symmetry_hi[0]; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool ylo_is_symmetric (int j) const + { + return j == symmetry_lo[1]; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool yhi_is_symmetric (int j) const + { + return j == symmetry_hi[1]; + } + +#if (AMREX_SPACEDIM == 3) + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool zlo_is_symmetric (int k) const + { + return k == symmetry_lo[2]; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool zhi_is_symmetric (int k) const + { + return k == symmetry_hi[2]; + } +#endif + + [[nodiscard]] bool is_symmetric (int dir, int side, int idx) const + { +#if (AMREX_SPACEDIM == 2) + if (dir == 0) { + return (side == 0) ? xlo_is_symmetric(idx) : xhi_is_symmetric(idx); + } else { + return (side == 0) ? ylo_is_symmetric(idx) : yhi_is_symmetric(idx); + } +#else + if (dir == 0) { + return (side == 0) ? xlo_is_symmetric(idx) : xhi_is_symmetric(idx); + } else if (dir == 1) { + return (side == 0) ? ylo_is_symmetric(idx) : yhi_is_symmetric(idx); + } else { + return (side == 0) ? zlo_is_symmetric(idx) : zhi_is_symmetric(idx); + } +#endif + } +}; + +enum struct CurlCurlStateType { x, b, r }; // x, b & r=b-Ax + AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlcurlcurl_adotx_x (int i, int j, int k, Array4 const& Ax, Array4 const& ex, Array4 const& ey, Array4 const& ez, - Real alpha, Real beta, - GpuArray const& dxinv) + Real beta, GpuArray const& adxinv) { #if (AMREX_SPACEDIM == 2) amrex::ignore_unused(ez); - Real dyy = dxinv[1] * dxinv[1]; - Real dxy = dxinv[0] * dxinv[1]; + Real dyy = adxinv[1] * adxinv[1]; + Real dxy = adxinv[0] * adxinv[1]; Real ccex = ex(i ,j ,k ) * dyy * Real(2.0) - dyy * (ex(i ,j-1,k ) + ex(i ,j+1,k )) @@ -35,10 +327,10 @@ void mlcurlcurl_adotx_x (int i, int j, int k, Array4 const& Ax, - ey(i+1,j-1,k ) + ey(i+1,j ,k )); #else - Real dyy = dxinv[1] * dxinv[1]; - Real dzz = dxinv[2] * dxinv[2]; - Real dxy = dxinv[0] * dxinv[1]; - Real dxz = dxinv[0] * dxinv[2]; + Real dyy = adxinv[1] * adxinv[1]; + Real dzz = adxinv[2] * adxinv[2]; + Real dxy = adxinv[0] * adxinv[1]; + Real dxz = adxinv[0] * adxinv[2]; Real ccex = ex(i ,j ,k ) * (dyy+dzz)*Real(2.0) - dyy * (ex(i ,j-1,k ) + ex(i ,j+1,k )) @@ -53,7 +345,7 @@ void mlcurlcurl_adotx_x (int i, int j, int k, Array4 const& Ax, - ez(i+1,j ,k-1) + ez(i+1,j ,k )); #endif - Ax(i,j,k) = alpha * ccex + beta * ex(i,j,k); + Ax(i,j,k) = ccex + beta * ex(i,j,k); } AMREX_GPU_DEVICE AMREX_FORCE_INLINE @@ -61,13 +353,12 @@ void mlcurlcurl_adotx_y (int i, int j, int k, Array4 const& Ay, Array4 const& ex, Array4 const& ey, Array4 const& ez, - Real alpha, Real beta, - GpuArray const& dxinv) + Real beta, GpuArray const& adxinv) { #if (AMREX_SPACEDIM == 2) amrex::ignore_unused(ez); - Real dxx = dxinv[0] * dxinv[0]; - Real dxy = dxinv[0] * dxinv[1]; + Real dxx = adxinv[0] * adxinv[0]; + Real dxy = adxinv[0] * adxinv[1]; Real ccey = ey(i ,j ,k ) * dxx * Real(2.0) - dxx * (ey(i-1,j ,k ) + ey(i+1,j ,k )) @@ -76,10 +367,10 @@ void mlcurlcurl_adotx_y (int i, int j, int k, Array4 const& Ay, - ex(i-1,j+1,k ) + ex(i ,j+1,k )); #else - Real dxx = dxinv[0] * dxinv[0]; - Real dzz = dxinv[2] * dxinv[2]; - Real dxy = dxinv[0] * dxinv[1]; - Real dyz = dxinv[1] * dxinv[2]; + Real dxx = adxinv[0] * adxinv[0]; + Real dzz = adxinv[2] * adxinv[2]; + Real dxy = adxinv[0] * adxinv[1]; + Real dyz = adxinv[1] * adxinv[2]; Real ccey = ey(i ,j ,k ) * (dxx+dzz)*Real(2.0) - dxx * (ey(i-1,j ,k ) + ey(i+1,j ,k )) @@ -94,7 +385,7 @@ void mlcurlcurl_adotx_y (int i, int j, int k, Array4 const& Ay, - ez(i ,j+1,k-1) + ez(i ,j+1,k )); #endif - Ay(i,j,k) = alpha * ccey + beta * ey(i,j,k); + Ay(i,j,k) = ccey + beta * ey(i,j,k); } AMREX_GPU_DEVICE AMREX_FORCE_INLINE @@ -102,23 +393,22 @@ void mlcurlcurl_adotx_z (int i, int j, int k, Array4 const& Az, Array4 const& ex, Array4 const& ey, Array4 const& ez, - Real alpha, Real beta, - GpuArray const& dxinv) + Real beta, GpuArray const& adxinv) { #if (AMREX_SPACEDIM == 2) amrex::ignore_unused(ex,ey); - Real dxx = dxinv[0] * dxinv[0]; - Real dyy = dxinv[1] * dxinv[1]; + Real dxx = adxinv[0] * adxinv[0]; + Real dyy = adxinv[1] * adxinv[1]; Real ccez = ez(i ,j ,k ) * (dxx+dyy)*Real(2.0) - dxx * (ez(i-1,j ,k ) + ez(i+1,j ,k )) - dyy * (ez(i ,j-1,k ) + ez(i ,j+1,k )); #else - Real dxx = dxinv[0] * dxinv[0]; - Real dyy = dxinv[1] * dxinv[1]; - Real dxz = dxinv[0] * dxinv[2]; - Real dyz = dxinv[1] * dxinv[2]; + Real dxx = adxinv[0] * adxinv[0]; + Real dyy = adxinv[1] * adxinv[1]; + Real dxz = adxinv[0] * adxinv[2]; + Real dyz = adxinv[1] * adxinv[2]; Real ccez = ez(i ,j ,k ) * (dxx+dyy)*Real(2.0) - dxx * (ez(i-1,j ,k ) + ez(i+1,j ,k )) @@ -133,152 +423,179 @@ void mlcurlcurl_adotx_z (int i, int j, int k, Array4 const& Az, - ey(i ,j-1,k+1) + ey(i ,j ,k+1)); #endif - Az(i,j,k) = alpha * ccez + beta * ez(i,j,k); + Az(i,j,k) = ccez + beta * ez(i,j,k); } AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void mlcurlcurl_gsrb_x (int i, int j, int k, - Array4 const& ex, - Array4 const& ey, - Array4 const& ez, - Array4 const& rhs, - Real alpha, Real beta, - GpuArray const& dxinv, int redblack) +void mlcurlcurl_gs4 (int i, int j, int k, + Array4 const& ex, + Array4 const& ey, + Array4 const& ez, + Array4 const& rhsx, + Array4 const& rhsy, + Array4 const& rhsz, +#if (AMREX_SPACEDIM == 2) + Real beta, +#endif + GpuArray const& adxinv, + int color, LUSolver const& lusolver, + CurlCurlDirichletInfo const& dinfo, + CurlCurlSymmetryInfo const& sinfo) { - constexpr Real omega = Real(1.15); + if (dinfo.is_dirichlet_node(i,j,k)) { return; } - if ((i+j+k+redblack) % 2 != 0) { return; } + int my_color = i%2 + 2*(j%2); + if (k%2 != 0) { + my_color = 3 - my_color; + } #if (AMREX_SPACEDIM == 2) - amrex::ignore_unused(ez); - Real dyy = dxinv[1] * dxinv[1]; - Real dxy = dxinv[0] * dxinv[1]; - Real gamma = alpha * (dyy)*Real(2.0) + beta; - Real ccex = - - dyy * (ex(i ,j-1,k ) + - ex(i ,j+1,k )) - + dxy * (ey(i ,j-1,k ) - - ey(i ,j ,k ) - - ey(i+1,j-1,k ) - + ey(i+1,j ,k )); -#else - Real dyy = dxinv[1] * dxinv[1]; - Real dzz = dxinv[2] * dxinv[2]; - Real dxy = dxinv[0] * dxinv[1]; - Real dxz = dxinv[0] * dxinv[2]; - Real gamma = alpha * (dyy+dzz)*Real(2.0) + beta; - Real ccex = - - dyy * (ex(i ,j-1,k ) + - ex(i ,j+1,k )) - - dzz * (ex(i ,j ,k+1) + - ex(i ,j ,k-1)) - + dxy * (ey(i ,j-1,k ) - - ey(i ,j ,k ) - - ey(i+1,j-1,k ) - + ey(i+1,j ,k )) - + dxz * (ez(i ,j ,k-1) - - ez(i ,j ,k ) - - ez(i+1,j ,k-1) - + ez(i+1,j ,k )); -#endif - Real res = rhs(i,j,k) - (gamma*ex(i,j,k) + alpha*ccex); - ex(i,j,k) += omega/gamma * res; -} -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void mlcurlcurl_gsrb_y (int i, int j, int k, - Array4 const& ex, - Array4 const& ey, - Array4 const& ez, - Array4 const& rhs, - Real alpha, Real beta, - GpuArray const& dxinv, int redblack) -{ - constexpr Real omega = Real(1.15); + Real dxx = adxinv[0] * adxinv[0]; + Real dyy = adxinv[1] * adxinv[1]; - if ((i+j+k+redblack) % 2 != 0) { return; } + if (((my_color == 0 || my_color == 3) && (color == 0 || color == 3)) || + ((my_color == 1 || my_color == 2) && (color == 1 || color == 2))) + { + Real gamma = (dxx+dyy)*Real(2.0) + beta; + Real ccez = - dxx * (ez(i-1,j ,k ) + + ez(i+1,j ,k )) + - dyy * (ez(i ,j-1,k ) + + ez(i ,j+1,k )); + Real res = rhsz(i,j,k) - (gamma*ez(i,j,k) + ccez); + constexpr Real omega = Real(1.15); + ez(i,j,k) += omega/gamma * res; + } -#if (AMREX_SPACEDIM == 2) - amrex::ignore_unused(ez); - Real dxx = dxinv[0] * dxinv[0]; - Real dxy = dxinv[0] * dxinv[1]; - Real gamma = alpha * (dxx)*Real(2.0) + beta; - Real ccey = - - dxx * (ey(i-1,j ,k ) + - ey(i+1,j ,k )) - + dxy * (ex(i-1,j ,k ) - - ex(i ,j ,k ) - - ex(i-1,j+1,k ) - + ex(i ,j+1,k )); -#else - Real dxx = dxinv[0] * dxinv[0]; - Real dzz = dxinv[2] * dxinv[2]; - Real dxy = dxinv[0] * dxinv[1]; - Real dyz = dxinv[1] * dxinv[2]; - Real gamma = alpha * (dxx+dzz)*Real(2.0) + beta; - Real ccey = - - dxx * (ey(i-1,j ,k ) + - ey(i+1,j ,k )) - - dzz * (ey(i ,j ,k-1) + - ey(i ,j ,k+1)) - + dxy * (ex(i-1,j ,k ) - - ex(i ,j ,k ) - - ex(i-1,j+1,k ) - + ex(i ,j+1,k )) - + dyz * (ez(i ,j ,k-1) - - ez(i ,j ,k ) - - ez(i ,j+1,k-1) - + ez(i ,j+1,k )); -#endif - Real res = rhs(i,j,k) - (gamma*ey(i,j,k) + alpha*ccey); - ey(i,j,k) += omega/gamma * res; -} + if (my_color != color) { return; } -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void mlcurlcurl_gsrb_z (int i, int j, int k, - Array4 const& ex, - Array4 const& ey, - Array4 const& ez, - Array4 const& rhs, - Real alpha, Real beta, - GpuArray const& dxinv, int redblack) -{ - constexpr Real omega = Real(1.15); + Real dxy = adxinv[0]*adxinv[1]; - if ((i+j+k+redblack) % 2 != 0) { return; } + GpuArray b + {rhsx(i-1,j,k) - (-dyy * ( ex(i-1,j-1,k ) + + ex(i-1,j+1,k )) + + dxy * ( ey(i-1,j-1,k ) + -ey(i-1,j ,k ))), + rhsx(i ,j,k) - (-dyy * ( ex(i ,j-1,k ) + + ex(i ,j+1,k )) + + dxy * (-ey(i+1,j-1,k ) + +ey(i+1,j ,k ))), + rhsy(i,j-1,k) - (-dxx * ( ey(i-1,j-1,k ) + + ey(i+1,j-1,k )) + + dxy * ( ex(i-1,j-1,k ) + -ex(i ,j-1,k ))), + rhsy(i,j ,k) - (-dxx * ( ey(i-1,j ,k ) + + ey(i+1,j ,k )) + + dxy * (-ex(i-1,j+1,k ) + +ex(i ,j+1,k )))}; + + if (sinfo.xlo_is_symmetric(i)) { + b[0] = -b[1]; + } else if (sinfo.xhi_is_symmetric(i)) { + b[1] = -b[0]; + } + + if (sinfo.ylo_is_symmetric(j)) { + b[2] = -b[3]; + } else if (sinfo.yhi_is_symmetric(j)) { + b[3] = -b[2]; + } + + GpuArray x; + lusolver(x.data(), b.data()); + ex(i-1,j ,k ) = x[0]; + ex(i ,j ,k ) = x[1]; + ey(i ,j-1,k ) = x[2]; + ey(i ,j ,k ) = x[3]; -#if (AMREX_SPACEDIM == 2) - amrex::ignore_unused(ex,ey); - Real dxx = dxinv[0] * dxinv[0]; - Real dyy = dxinv[1] * dxinv[1]; - Real gamma = alpha * (dxx+dyy)*Real(2.0) + beta; - Real ccez = - - dxx * (ez(i-1,j ,k ) + - ez(i+1,j ,k )) - - dyy * (ez(i ,j-1,k ) + - ez(i ,j+1,k )); #else - Real dxx = dxinv[0] * dxinv[0]; - Real dyy = dxinv[1] * dxinv[1]; - Real dxz = dxinv[0] * dxinv[2]; - Real dyz = dxinv[1] * dxinv[2]; - Real gamma = alpha * (dxx+dyy)*Real(2.0) + beta; - Real ccez = - - dxx * (ez(i-1,j ,k ) + - ez(i+1,j ,k )) - - dyy * (ez(i ,j-1,k ) + - ez(i ,j+1,k )) - + dxz * (ex(i-1,j ,k ) - - ex(i ,j ,k ) - - ex(i-1,j ,k+1) - + ex(i ,j ,k+1)) - + dyz * (ey(i ,j-1,k ) - - ey(i ,j ,k ) - - ey(i ,j-1,k+1) - + ey(i ,j ,k+1)); + + if (my_color != color) { return; } + + Real dxx = adxinv[0]*adxinv[0]; + Real dyy = adxinv[1]*adxinv[1]; + Real dzz = adxinv[2]*adxinv[2]; + Real dxy = adxinv[0]*adxinv[1]; + Real dxz = adxinv[0]*adxinv[2]; + Real dyz = adxinv[1]*adxinv[2]; + + GpuArray b + {rhsx(i-1,j,k) - (-dyy * ( ex(i-1,j-1,k ) + + ex(i-1,j+1,k )) + - dzz * ( ex(i-1,j ,k+1) + + ex(i-1,j ,k-1)) + + dxy * ( ey(i-1,j-1,k ) + -ey(i-1,j ,k )) + + dxz * ( ez(i-1,j ,k-1) + -ez(i-1,j ,k ))), + rhsx(i ,j,k) - (-dyy * ( ex(i ,j-1,k ) + + ex(i ,j+1,k )) + - dzz * ( ex(i ,j ,k+1) + + ex(i ,j ,k-1)) + + dxy * (-ey(i+1,j-1,k ) + +ey(i+1,j ,k )) + + dxz * (-ez(i+1,j ,k-1) + +ez(i+1,j ,k ))), + rhsy(i,j-1,k) - (-dxx * ( ey(i-1,j-1,k ) + + ey(i+1,j-1,k )) + - dzz * ( ey(i ,j-1,k-1) + + ey(i ,j-1,k+1)) + + dxy * ( ex(i-1,j-1,k ) + -ex(i ,j-1,k )) + + dyz * ( ez(i ,j-1,k-1) + -ez(i ,j-1,k ))), + rhsy(i,j ,k) - (-dxx * ( ey(i-1,j ,k ) + + ey(i+1,j ,k )) + - dzz * ( ey(i ,j ,k-1) + + ey(i ,j ,k+1)) + + dxy * (-ex(i-1,j+1,k ) + +ex(i ,j+1,k )) + + dyz * (-ez(i ,j+1,k-1) + +ez(i ,j+1,k ))), + rhsz(i,j,k-1) - (-dxx * ( ez(i-1,j ,k-1) + + ez(i+1,j ,k-1)) + - dyy * ( ez(i ,j-1,k-1) + + ez(i ,j+1,k-1)) + + dxz * ( ex(i-1,j ,k-1) + -ex(i ,j ,k-1)) + + dyz * ( ey(i ,j-1,k-1) + -ey(i ,j ,k-1))), + rhsz(i,j,k ) - (-dxx * ( ez(i-1,j ,k ) + + ez(i+1,j ,k )) + - dyy * ( ez(i ,j-1,k ) + + ez(i ,j+1,k )) + + dxz * (-ex(i-1,j ,k+1) + +ex(i ,j ,k+1)) + + dyz * (-ey(i ,j-1,k+1) + +ey(i ,j ,k+1)))}; + + if (sinfo.xlo_is_symmetric(i)) { + b[0] = -b[1]; + } else if (sinfo.xhi_is_symmetric(i)) { + b[1] = -b[0]; + } + + if (sinfo.ylo_is_symmetric(j)) { + b[2] = -b[3]; + } else if (sinfo.yhi_is_symmetric(j)) { + b[3] = -b[2]; + } + + if (sinfo.zlo_is_symmetric(k)) { + b[4] = -b[5]; + } else if (sinfo.zhi_is_symmetric(k)) { + b[5] = -b[4]; + } + + GpuArray x; + lusolver(x.data(), b.data()); + ex(i-1,j ,k ) = x[0]; + ex(i ,j ,k ) = x[1]; + ey(i ,j-1,k ) = x[2]; + ey(i ,j ,k ) = x[3]; + ez(i ,j ,k-1) = x[4]; + ez(i ,j ,k ) = x[5]; #endif - Real res = rhs(i,j,k) - (gamma*ez(i,j,k) + alpha*ccez); - ez(i,j,k) += omega/gamma * res; } AMREX_GPU_DEVICE AMREX_FORCE_INLINE @@ -338,12 +655,12 @@ AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlcurlcurl_restriction (int dir, int i, int j, int k, Array4 const& crse, Array4 const& fine, - Array4 const& dmsk) + CurlCurlDirichletInfo const& dinfo) { int ii = i*2; int jj = j*2; int kk = k*2; - if (dmsk(ii,jj,kk)) { + if (dinfo.is_dirichlet_edge(dir,ii,jj,kk)) { crse(i,j,k) = Real(0.0); } else diff --git a/Tests/LinearSolvers/CurlCurl/MyTest.H b/Tests/LinearSolvers/CurlCurl/MyTest.H index 0a4a8d723e..aac7f506c6 100644 --- a/Tests/LinearSolvers/CurlCurl/MyTest.H +++ b/Tests/LinearSolvers/CurlCurl/MyTest.H @@ -38,7 +38,7 @@ private: amrex::Array exact; amrex::Array rhs; - amrex::Real alpha_over_dx2 = 1.0; + amrex::Real alpha_over_dx2 = 100.0; amrex::Real alpha; amrex::Real beta = 1.0; }; diff --git a/Tests/LinearSolvers/CurlCurl/MyTest.cpp b/Tests/LinearSolvers/CurlCurl/MyTest.cpp index fa72fcbca2..6a5bc5a125 100644 --- a/Tests/LinearSolvers/CurlCurl/MyTest.cpp +++ b/Tests/LinearSolvers/CurlCurl/MyTest.cpp @@ -39,6 +39,9 @@ MyTest::solve () for (auto& mf : solution) { mf.setVal(Real(0)); } + for (auto& mf : rhs) { + mf.OverrideSync(geom.periodicity()); + } mlmg.solve({&solution}, {&rhs}, Real(1.0e-10), Real(0)); amrex::Print() << " Number of cells: " << n_cell << std::endl; diff --git a/Tests/LinearSolvers/CurlCurl/inputs b/Tests/LinearSolvers/CurlCurl/inputs index 375562867a..2ba1eb4ff2 100644 --- a/Tests/LinearSolvers/CurlCurl/inputs +++ b/Tests/LinearSolvers/CurlCurl/inputs @@ -5,7 +5,7 @@ max_grid_size = 64 verbose = 2 bottom_verbose = 2 -alpha_over_dx2 = 1.0 +alpha_over_dx2 = 100.0 beta = 1.0 amrex.fpe_trap_invalid=1 From c440e4e51fe7c87b12fb5cac0bdf43af43260c34 Mon Sep 17 00:00:00 2001 From: Ann Almgren Date: Sat, 2 Mar 2024 12:17:45 -0800 Subject: [PATCH 2/4] Ref ratio 3 (#3781) When using ref_ratio 3, imposing max_grid_size could make the fine grids not divisible by 3. This commit fixes that using a blocking-factor-type approach to the grid creation algorithm. --- Src/AmrCore/AMReX_AmrMesh.cpp | 46 ++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/Src/AmrCore/AMReX_AmrMesh.cpp b/Src/AmrCore/AMReX_AmrMesh.cpp index c0e191d79e..e300bb98ce 100644 --- a/Src/AmrCore/AMReX_AmrMesh.cpp +++ b/Src/AmrCore/AMReX_AmrMesh.cpp @@ -763,13 +763,47 @@ AmrMesh::MakeNewGrids (int lbase, Real time, int& new_finest, Vector& } new_bx.Bcast(); // Broadcast the new BoxList to other processes - // - // Refine up to levf. - // - new_bx.refine(ref_ratio[levc]); - BL_ASSERT(new_bx.isDisjoint()); + bool odd_ref_ratio = false; + for (auto const& rr : ref_ratio[levc]) { + if (rr != 1 && (rr%2 != 0)) { + odd_ref_ratio = true; + } + } + + if (odd_ref_ratio) + { + // This approach imposes max_grid_size (suitably scaled) before + // refining so as to ensure fine grids align with coarse grids + + // + // Impose max_grid_size (suitably coarsened) + // + AMREX_ASSERT(max_grid_size[levf].allGE(ref_ratio[levc])); + new_grids[levf] = BoxArray(std::move(new_bx), max_grid_size[levf]/ref_ratio[levc]); + + // + // Refine up to levf. + // + new_grids[levf].refine(ref_ratio[levc]); + } + else + { + // This approach imposes max_grid_size after refining. + // For ref_ratio = 3 this can create fine grids that do not correctly divide by 3, + // but we leave it here so as not to change the gridding in + // existing ref_ratio = 2 or 4 applications - new_grids[levf] = BoxArray(std::move(new_bx), max_grid_size[levf]); + // + // Refine up to levf. + // + new_bx.refine(ref_ratio[levc]); + + // + // Impose max_grid_size + // + new_grids[levf] = BoxArray(std::move(new_bx), max_grid_size[levf]); + } + BL_ASSERT(new_grids[levf].isDisjoint()); } } } From cf712eb6a8aa6f085a7c3e278e9d1ef880b1025e Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 3 Mar 2024 16:55:37 -0800 Subject: [PATCH 3/4] Update GMRES/MLMG interface (#3779) For the curl curl test, if beta = 1e-9 alpha/dx^2, the multigrid solver is able to reduce the residual by 10 orders of magnitude in 10 v-cycles. But for beta = 1e-14 alpha/dx^2, the multigrid solver's residual will stall at about 5e-6 of the original residual. However, it can be solved using GMRES with multigrid as preconditioner. --- Src/Base/AMReX_FabArrayUtility.H | 39 ++++++++++ Src/LinearSolvers/AMReX_GMRES.H | 23 ++++-- Src/LinearSolvers/AMReX_GMRES_MLMG.H | 39 ++++++---- Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H | 11 ++- Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp | 56 ++++++++++++++ Src/LinearSolvers/MLMG/AMReX_MLMG.H | 82 ++++++++++++++++++--- Tests/LinearSolvers/CurlCurl/GNUmakefile | 2 +- Tests/LinearSolvers/CurlCurl/MyTest.H | 7 +- Tests/LinearSolvers/CurlCurl/MyTest.cpp | 37 ++++++++-- Tests/LinearSolvers/CurlCurl/inputs | 4 +- 10 files changed, 249 insertions(+), 51 deletions(-) diff --git a/Src/Base/AMReX_FabArrayUtility.H b/Src/Base/AMReX_FabArrayUtility.H index 0897c57ed4..311e1fab43 100644 --- a/Src/Base/AMReX_FabArrayUtility.H +++ b/Src/Base/AMReX_FabArrayUtility.H @@ -1616,6 +1616,13 @@ void setBndry (MF& dst, typename MF::value_type val, int scomp, int ncomp) dst.setBndry(val, scomp, ncomp); } +//! dst *= val +template ,int> = 0> +void Scale (MF& dst, typename MF::value_type val, int scomp, int ncomp, int nghost) +{ + dst.mult(val, scomp, ncomp, nghost); +} + //! dst = src template && @@ -1650,6 +1657,16 @@ void Xpay (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dco MF::Xpay(dst, a, src, scomp, dcomp, ncomp, nghost); } +//! dst = a*src_a + b*src_b +template ,int> = 0> +void LinComb (MF& dst, + typename MF::value_type a, MF const& src_a, int acomp, + typename MF::value_type b, MF const& src_b, int bcomp, + int dcomp, int ncomp, IntVect const& nghost) +{ + MF::LinComb(dst, a, src_a, acomp, b, src_b, bcomp, dcomp, ncomp, nghost); +} + //! dst = src w/ MPI communication template , int> = 0> void ParallelCopy (MF& dst, MF const& src, int scomp, int dcomp, int ncomp, @@ -1686,6 +1703,16 @@ void setBndry (Array& dst, typename MF::value_type val, int scomp, int nco } } +//! dst *= val +template ,int> = 0> +void Scale (Array& dst, typename MF::value_type val, int scomp, int ncomp, + int nghost) +{ + for (auto& mf : dst) { + mf.mult(val, scomp, ncomp, nghost); + } +} + //! dst = src template && @@ -1730,6 +1757,18 @@ void Xpay (Array& dst, typename MF::value_type a, } } +//! dst = a*src_a + b*src_b +template ,int> = 0> +void LinComb (Array& dst, + typename MF::value_type a, Array const& src_a, int acomp, + typename MF::value_type b, Array const& src_b, int bcomp, + int dcomp, int ncomp, IntVect const& nghost) +{ + for (std::size_t i = 0; i < N; ++i) { + MF::LinComb(dst[i], a, src_a[i], acomp, b, src_b[i], bcomp, dcomp, ncomp, nghost); + } +} + //! dst = src w/ MPI communication template , int> = 0> void ParallelCopy (Array& dst, Array const& src, diff --git a/Src/LinearSolvers/AMReX_GMRES.H b/Src/LinearSolvers/AMReX_GMRES.H index fd729c9e45..0972ff8eab 100644 --- a/Src/LinearSolvers/AMReX_GMRES.H +++ b/Src/LinearSolvers/AMReX_GMRES.H @@ -53,15 +53,15 @@ namespace amrex { * - void precond(V& lhs, V const& rhs)\n * applies preconditioner to rhs. If there is no preconditioner, * this function should do lhs = rhs. - * - void setVal(V& v, RT value)\n - * v = value. + * - void setToZero(V& v)\n + * v = 0. */ template class GMRES { public: - using RT = typename V::value_type; // double or float + using RT = typename M::RT; // double or float GMRES (); @@ -87,6 +87,9 @@ public: //! Sets restart length. The default is 30. void setRestartLength (int rl); + //! Sets the number of iterations + void setNumIters (int niters) { m_maxiter = niters; } + //! Gets the number of iterations. [[nodiscard]] int getNumIters () const { return m_its; } @@ -202,9 +205,9 @@ void GMRES::solve (V& a_sol, V const& a_rhs, RT a_tol_rel, RT a_tol_abs, in m_v_tmp_lhs = std::make_unique(m_linop->makeVecLHS()); } if (m_vv.empty()) { - m_vv.resize(m_restrtlen+1); - for (auto& v : m_vv) { - v = m_linop->makeVecRHS(); + m_vv.reserve(m_restrtlen+1); + for (int i = 0; i < 2; ++i) { // to save space, start with just 2 + m_vv.emplace_back(m_linop->makeVecRHS()); } } @@ -216,7 +219,7 @@ void GMRES::solve (V& a_sol, V const& a_rhs, RT a_tol_rel, RT a_tol_abs, in auto rnorm0 = RT(0); m_linop->assign(m_vv[0], a_rhs); - m_linop->setVal(a_sol, RT(0.0)); + m_linop->setToZero(a_sol); m_its = 0; m_status = -1; @@ -269,6 +272,10 @@ void GMRES::cycle (V& a_xx, int& a_status, int& a_itcount, RT& a_rnorm0) if (a_status == 0) { break; } + while (m_vv.size() < it+2) { + m_vv.emplace_back(m_linop->makeVecRHS()); + } + auto const& vv_it = m_vv[it ]; auto & vv_it1 = m_vv[it+1]; @@ -384,7 +391,7 @@ void GMRES::build_solution (V& a_xx, int const it) m_grs[k] = tt / m_hh(k,k); } - m_linop->setVal(*m_v_tmp_rhs, RT(0.0)); + m_linop->setToZero(*m_v_tmp_rhs); for (int ii = 0; ii < it+1; ++ii) { m_linop->increment(*m_v_tmp_rhs, m_vv[ii], m_grs[ii]); } diff --git a/Src/LinearSolvers/AMReX_GMRES_MLMG.H b/Src/LinearSolvers/AMReX_GMRES_MLMG.H index 5106afde37..0f065ad0bb 100644 --- a/Src/LinearSolvers/AMReX_GMRES_MLMG.H +++ b/Src/LinearSolvers/AMReX_GMRES_MLMG.H @@ -13,7 +13,7 @@ class GMRESMLMGT { public: using MF = typename M::MFType; // typically MultiFab - using RT = typename MF::value_type; // double or float + using RT = typename M::RT; // double or float explicit GMRESMLMGT (M& mlmg); @@ -29,8 +29,8 @@ public: RT dotProduct (MF const& mf1, MF const& mf2) const; - //! lhs = value - static void setVal (MF& lhs, RT value); + //! lhs = 0 + static void setToZero (MF& lhs); //! lhs = rhs static void assign (MF& lhs, MF const& rhs); @@ -58,6 +58,8 @@ template GMRESMLMGT::GMRESMLMGT (M& mlmg) : m_mlmg(mlmg), m_linop(mlmg.getLinOp()) { + m_mlmg.setVerbose(0); + m_mlmg.setBottomVerbose(0); m_mlmg.prepareLinOp(); } @@ -71,7 +73,7 @@ template auto GMRESMLMGT::makeVecLHS () const -> MF { auto mf = m_linop.make(0, 0, IntVect(1)); - mf.setBndry(0); + setBndry(mf, RT(0), 0, nComp(mf)); return mf; } @@ -85,7 +87,7 @@ auto GMRESMLMGT::norm2 (MF const& mf) const -> RT template void GMRESMLMGT::scale (MF& mf, RT scale_factor) { - mf.mult(scale_factor, 0, mf.nComp()); + Scale(mf, scale_factor, 0, nComp(mf), 0); } template @@ -95,27 +97,27 @@ auto GMRESMLMGT::dotProduct (MF const& mf1, MF const& mf2) const -> RT } template -void GMRESMLMGT::setVal (MF& lhs, RT value) +void GMRESMLMGT::setToZero (MF& lhs) { - lhs.setVal(value); + setVal(lhs, RT(0.0)); } template void GMRESMLMGT::assign (MF& lhs, MF const& rhs) { - MF::Copy(lhs, rhs, 0, 0, lhs.nComp(), IntVect(0)); + LocalCopy(lhs, rhs, 0, 0, nComp(lhs), IntVect(0)); } template void GMRESMLMGT::increment (MF& lhs, MF const& rhs, RT a) { - MF::Saxpy(lhs, a, rhs, 0, 0, lhs.nComp(), IntVect(0)); + Saxpy(lhs, a, rhs, 0, 0, nComp(lhs), IntVect(0)); } template void GMRESMLMGT::linComb (MF& lhs, RT a, MF const& rhs_a, RT b, MF const& rhs_b) { - MF::LinComb(lhs, a, rhs_a, 0, b, rhs_b, 0, 0, lhs.nComp(), IntVect(0)); + LinComb(lhs, a, rhs_a, 0, b, rhs_b, 0, 0, nComp(lhs), IntVect(0)); } template @@ -130,13 +132,18 @@ template void GMRESMLMGT::precond (MF& lhs, MF const& rhs) const { if (m_use_precond) { - // for now, let's just do some smoothing - lhs.setVal(RT(0.0)); - for (int m = 0; m < 4; ++m) { - m_linop.smooth(0, 0, lhs, rhs, (m==0) ? true : false); - } + AMREX_ALWAYS_ASSERT(m_linop.NAMRLevels() == 1); + + m_mlmg.prepareMGcycle(); + + LocalCopy(m_mlmg.res[0][0], rhs, 0, 0, nComp(rhs), IntVect(0)); + + m_mlmg.mgVcycle(0,0); + + LocalCopy(lhs, m_mlmg.cor[0][0], 0, 0, nComp(rhs), IntVect(0)); + } else { - amrex::Copy(lhs, rhs, 0, 0, lhs.nComp(), IntVect(0)); + LocalCopy(lhs, rhs, 0, 0, nComp(lhs), IntVect(0)); } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H index 3e8f06c58e..060fea8424 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H @@ -14,15 +14,12 @@ namespace amrex { * scalar, and beta is a non-negative scalar. * * It's the caller's responsibility to make sure rhs has consistent nodal - * data. If needed, one could use FabArray::OverrideSync to synchronize - * nodal data. + * data. If needed, one could call prepareRHS for this. * * The smoother is based on the 4-color Gauss-Seidel smoother of Li * et. al. 2020. "An Efficient Preconditioner for 3-D Finite Difference * Modeling of the Electromagnetic Diffusion Process in the Frequency * Domain", IEEE Transactions on Geoscience and Remote Sensing, 58, 500-509. - * - * TODO: If beta is zero, the system could be singular. */ class MLCurlCurl : public MLLinOpT > @@ -48,6 +45,12 @@ public: void setScalars (RT a_alpha, RT a_beta) noexcept; + //! Synchronize RHS on nodal points and set to zero on Dirichlet + //! boundaries. If the user can guarantee these requirements on RHS, + //! this function does not need to be called. If this is called, it + //! should only be called after setDomainBC is called. + void prepareRHS (Vector const& rhs) const; + [[nodiscard]] std::string name () const override { return std::string("curl of curl"); } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp index 6918dfca96..506ed1b905 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp @@ -32,6 +32,62 @@ void MLCurlCurl::setScalars (RT a_alpha, RT a_beta) noexcept { m_alpha = a_alpha; m_beta = a_beta; + AMREX_ASSERT(m_beta > RT(0)); +} + +void MLCurlCurl::prepareRHS (Vector const& rhs) const +{ + MFItInfo mfi_info{}; +#ifdef AMREX_USE_GPU + Vector> tags; + mfi_info.DisableDeviceSync(); +#endif + + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + for (auto& mf : *rhs[amrlev]) { + mf.OverrideSync(m_geom[amrlev][0].periodicity()); + + auto const idxtype = mf.ixType(); + Box const domain = amrex::convert(m_geom[amrlev][0].Domain(), idxtype); + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(mf,mfi_info); mfi.isValid(); ++mfi) { + auto const& vbx = mfi.validbox(); + auto const& a = mf.array(mfi); + for (OrientationIter oit; oit; ++oit) { + Orientation const face = oit(); + int const idim = face.coordDir(); + bool is_dirichlet = face.isLow() + ? m_lobc[0][idim] == LinOpBCType::Dirichlet + : m_hibc[0][idim] == LinOpBCType::Dirichlet; + if (is_dirichlet && domain[face] == vbx[face] && + idxtype.nodeCentered(idim)) + { + Box b = vbx; + b.setRange(idim, vbx[face], 1); +#ifdef AMREX_USE_GPU + tags.emplace_back(Array4BoxTag{a,b}); +#else + amrex::LoopOnCpu(b, [&] (int i, int j, int k) + { + a(i,j,k) = RT(0.0); + }); +#endif + } + } + } + } + } + +#ifdef AMREX_USE_GPU + ParallelFor(tags, + [=] AMREX_GPU_DEVICE (int i, int j, int k, Array4BoxTag const& tag) noexcept + { + tag.dfab(i,j,k) = RT(0.0); + }); +#endif } void MLCurlCurl::setLevelBC (int amrlev, const MF* levelbcdata, // TODO diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H index facc231ae8..60a4f8470c 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H @@ -20,6 +20,7 @@ public: }; template friend class MLCGSolverT; + template friend class GMRESMLMGT; using MFType = MF; using FAB = typename MLLinOpT::FAB; @@ -170,6 +171,8 @@ public: void prepareLinOp (); + void prepareMGcycle (); + void oneIter (int iter); void miniCycle (int amrlev); @@ -866,17 +869,11 @@ MLMGT::apply (const Vector& out, const Vector& a_in) { IntVect ng = ng_sol; if (cf_strategy == CFStrategy::ghostnodes) { ng = IntVect(nghost); } - in_raii[alev].define(boxArray (*a_in[alev]), - DistributionMap(*a_in[alev]), - nComp (*a_in[alev]), ng, - MFInfo(), *linop.Factory(alev)); + in_raii[alev] = linop.make(alev, 0, ng); LocalCopy(in_raii[alev], *a_in[alev], 0, 0, ncomp, IntVect(nghost)); in[alev] = &(in_raii[alev]); } - rh[alev].define(boxArray (*a_in[alev]), - DistributionMap(*a_in[alev]), - nComp (*a_in[alev]), nghost, MFInfo(), - *linop.Factory(alev)); + rh[alev] = linop.make(alev, 0, IntVect(nghost)); setVal(rh[alev], RT(0.0)); } @@ -895,11 +892,15 @@ MLMGT::apply (const Vector& out, const Vector& a_in) linop.reflux(alev, *out[alev], *in[alev], rh[alev], *out[alev+1], *in[alev+1], rh[alev+1]); if (linop.isCellCentered()) { + if constexpr (IsMultiFabLike_v) { #ifdef AMREX_USE_EB - EB_average_down(*out[alev+1], *out[alev], 0, nComp(*out[alev]), amrrr[alev]); + EB_average_down(*out[alev+1], *out[alev], 0, nComp(*out[alev]), amrrr[alev]); #else - average_down(*out[alev+1], *out[alev], 0, nComp(*out[alev]), amrrr[alev]); + average_down(*out[alev+1], *out[alev], 0, nComp(*out[alev]), amrrr[alev]); #endif + } else { + amrex::Abort("MLMG: TODO average_down for non-MultiFab"); + } } } } @@ -912,7 +913,7 @@ MLMGT::apply (const Vector& out, const Vector& a_in) for (int alev = 0; alev <= finest_amr_lev; ++alev) { if (cf_strategy == CFStrategy::ghostnodes) { nghost = linop.getNGrow(alev); } - out[alev]->negate(nghost); + Scale(*out[alev], RT(-1), 0, nComp(*out[alev]), nghost); } } @@ -1113,6 +1114,61 @@ MLMGT::prepareLinOp () } } +template +void +MLMGT::prepareMGcycle () +{ + if (res.empty()) { + IntVect ng = linop.getNGrowVectRestriction(); + linop.make(res, ng); + linop.make(rescor, ng); + + for (int alev = 0; alev <= finest_amr_lev; ++alev) + { + const int nmglevs = linop.NMGLevels(alev); + for (int mglev = 0; mglev < nmglevs; ++mglev) + { + setVal(res [alev][mglev], RT(0.0)); + setVal(rescor[alev][mglev], RT(0.0)); + } + } + + IntVect ng_sol(1); + if (linop.hasHiddenDimension()) { ng_sol[linop.hiddenDirection()] = 0; } + ng = ng_sol; + + cor.resize(namrlevs); + for (int alev = 0; alev <= finest_amr_lev; ++alev) + { + const int nmglevs = linop.NMGLevels(alev); + cor[alev].resize(nmglevs); + for (int mglev = 0; mglev < nmglevs; ++mglev) + { + cor[alev][mglev] = linop.make(alev, mglev, ng); + setVal(cor[alev][mglev], RT(0.0)); + } + } + + cor_hold.resize(std::max(namrlevs-1,1)); + { + const int alev = 0; + const int nmglevs = linop.NMGLevels(alev); + cor_hold[alev].resize(nmglevs); + for (int mglev = 0; mglev < nmglevs-1; ++mglev) + { + cor_hold[alev][mglev] = linop.make(alev, mglev, ng); + setVal(cor_hold[alev][mglev], RT(0.0)); + } + } + for (int alev = 1; alev < finest_amr_lev; ++alev) + { + cor_hold[alev].resize(1); + cor_hold[alev][0] = linop.make(alev, 0, ng); + setVal(cor_hold[alev][0], RT(0.0)); + } + } +} + template void MLMGT::prepareForNSolve () @@ -1524,7 +1580,9 @@ MLMGT::actualBottomSolve () ParallelContext::pop(); - timer[bottom_time] += amrex::second() - bottom_start_time; + if (! timer.empty()) { + timer[bottom_time] += amrex::second() - bottom_start_time; + } } template diff --git a/Tests/LinearSolvers/CurlCurl/GNUmakefile b/Tests/LinearSolvers/CurlCurl/GNUmakefile index ab4cb8e762..caa6472719 100644 --- a/Tests/LinearSolvers/CurlCurl/GNUmakefile +++ b/Tests/LinearSolvers/CurlCurl/GNUmakefile @@ -17,7 +17,7 @@ include $(AMREX_HOME)/Tools/GNUMake/Make.defs include ./Make.package -Pdirs := Base Boundary LinearSolvers/MLMG +Pdirs := Base Boundary LinearSolvers Ppack += $(foreach dir, $(Pdirs), $(AMREX_HOME)/Src/$(dir)/Make.package) diff --git a/Tests/LinearSolvers/CurlCurl/MyTest.H b/Tests/LinearSolvers/CurlCurl/MyTest.H index aac7f506c6..050d328c61 100644 --- a/Tests/LinearSolvers/CurlCurl/MyTest.H +++ b/Tests/LinearSolvers/CurlCurl/MyTest.H @@ -30,6 +30,9 @@ private: bool consolidation = true; int max_coarsening_level = 30; + bool use_gmres = false; + bool gmres_use_precond = true; + amrex::Geometry geom; amrex::BoxArray grids; amrex::DistributionMapping dmap; @@ -38,8 +41,8 @@ private: amrex::Array exact; amrex::Array rhs; - amrex::Real alpha_over_dx2 = 100.0; - amrex::Real alpha; + amrex::Real beta_factor = 1.e-2; + amrex::Real alpha = 1.0; amrex::Real beta = 1.0; }; diff --git a/Tests/LinearSolvers/CurlCurl/MyTest.cpp b/Tests/LinearSolvers/CurlCurl/MyTest.cpp index 6a5bc5a125..5af68fa628 100644 --- a/Tests/LinearSolvers/CurlCurl/MyTest.cpp +++ b/Tests/LinearSolvers/CurlCurl/MyTest.cpp @@ -2,6 +2,8 @@ #include "MyTest.H" +#include +#include #include #include @@ -31,6 +33,7 @@ MyTest::solve () LinOpBCType::Periodic)}); mlcc.setScalars(alpha, beta); + mlcc.prepareRHS({&rhs}); MLMGT > mlmg(mlcc); mlmg.setMaxIter(max_iter); @@ -39,10 +42,29 @@ MyTest::solve () for (auto& mf : solution) { mf.setVal(Real(0)); } - for (auto& mf : rhs) { - mf.OverrideSync(geom.periodicity()); + + auto tol_rel = Real(1.0e-10); + auto tol_abs = Real(0.0); + + if (use_gmres) + { + // This system has homogeneous BC unlike + // Tests/LinearSolvers/ABecLaplacian_C, so the setup is simpler. + + using V = Array; + using M = GMRESMLMGT>; + M mat(mlmg); + mat.usePrecond(gmres_use_precond); + + GMRES gmres; + gmres.setVerbose(verbose); + gmres.define(mat); + gmres.solve(solution, rhs, tol_rel, tol_abs); + } + else + { + mlmg.solve({&solution}, {&rhs}, tol_rel, tol_abs); } - mlmg.solve({&solution}, {&rhs}, Real(1.0e-10), Real(0)); amrex::Print() << " Number of cells: " << n_cell << std::endl; auto dvol = AMREX_D_TERM(geom.CellSize(0), *geom.CellSize(1), *geom.CellSize(2)); @@ -73,8 +95,11 @@ MyTest::readParameters () pp.query("consolidation", consolidation); pp.query("max_coarsening_level", max_coarsening_level); - pp.query("alpha_over_dx2", alpha_over_dx2); - pp.query("beta", beta); + pp.query("use_gmres", use_gmres); + pp.query("gmres_use_precond", gmres_use_precond); + + pp.query("beta_factor", beta_factor); + pp.query("alpha", alpha); } void @@ -87,7 +112,7 @@ MyTest::initData () geom.define(domain); const Real dx = geom.CellSize(0); - alpha = alpha_over_dx2 * dx*dx; + beta = beta_factor * alpha/(dx*dx); grids.define(domain); grids.maxSize(max_grid_size); diff --git a/Tests/LinearSolvers/CurlCurl/inputs b/Tests/LinearSolvers/CurlCurl/inputs index 2ba1eb4ff2..15eff3d270 100644 --- a/Tests/LinearSolvers/CurlCurl/inputs +++ b/Tests/LinearSolvers/CurlCurl/inputs @@ -5,8 +5,8 @@ max_grid_size = 64 verbose = 2 bottom_verbose = 2 -alpha_over_dx2 = 100.0 -beta = 1.0 +alpha = 1.0 +beta_factor = 0.01 amrex.fpe_trap_invalid=1 amrex.fpe_trap_zero=1 From 3525b4a3f27eb64f746dd69b6613f71bb02d6e63 Mon Sep 17 00:00:00 2001 From: Ann Almgren Date: Sun, 3 Mar 2024 17:13:24 -0800 Subject: [PATCH 4/4] fix for ref_ratio=1 (#3786) The current routines compute the slope even if ref_ratio = 1. In this PR we test on ref_ratio before trying to compute slopes and if ref_ratio is equal to 1, we bypass the slope computation. --------- Co-authored-by: Weiqun Zhang --- Src/AmrCore/AMReX_InterpFaceReg_3D_C.H | 39 ++++++++++++-------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/Src/AmrCore/AMReX_InterpFaceReg_3D_C.H b/Src/AmrCore/AMReX_InterpFaceReg_3D_C.H index c55cda3c35..2df7fef055 100644 --- a/Src/AmrCore/AMReX_InterpFaceReg_3D_C.H +++ b/Src/AmrCore/AMReX_InterpFaceReg_3D_C.H @@ -12,11 +12,10 @@ void interp_face_reg (int i, int j, int k, IntVect const& rr, Array4 const int jc = amrex::coarsen(j,rr[1]); int kc = amrex::coarsen(k,rr[2]); if (idim == 0) { - if (jc == domface.smallEnd(1) || jc == domface.bigEnd(1)) { - for (int n = 0; n < ncomp; ++n) { - fine(i,j,k,n+scomp) = crse(ic,jc,kc,n); - } - } else { + for (int n = 0; n < ncomp; ++n) { + fine(i,j,k,n+scomp) = crse(ic,jc,kc,n); + } + if (jc != domface.smallEnd(1) && jc != domface.bigEnd(1) && rr[1] > 1) { Real sfy = Real(1.0); for (int n = 0; n < ncomp; ++n) { Real dc = Real(0.5) * (crse(ic,jc+1,kc,n) - crse(ic,jc-1,kc,n)); @@ -32,11 +31,11 @@ void interp_face_reg (int i, int j, int k, IntVect const& rr, Array4 const } Real yoff = (static_cast(j - jc*rr[1]) + Real(0.5)) / Real(rr[1]) - Real(0.5); for (int n = 0; n < ncomp; ++n) { - fine(i,j,k,n+scomp) = crse(ic,jc,kc,n) + yoff * slope(i,j,k,n) * sfy; + fine(i,j,k,n+scomp) += yoff * slope(i,j,k,n) * sfy; } } - if (kc != domface.smallEnd(2) && kc != domface.bigEnd(2)) { + if (kc != domface.smallEnd(2) && kc != domface.bigEnd(2) && rr[2] > 1) { Real sfz = Real(1.0); for (int n = 0; n < ncomp; ++n) { Real dc = Real(0.5) * (crse(ic,jc,kc+1,n) - crse(ic,jc,kc-1,n)); @@ -56,11 +55,10 @@ void interp_face_reg (int i, int j, int k, IntVect const& rr, Array4 const } } } else if (idim == 1) { - if (ic == domface.smallEnd(0) || ic == domface.bigEnd(0)) { - for (int n = 0; n < ncomp; ++n) { - fine(i,j,k,n+scomp) = crse(ic,jc,kc,n); - } - } else { + for (int n = 0; n < ncomp; ++n) { + fine(i,j,k,n+scomp) = crse(ic,jc,kc,n); + } + if (ic != domface.smallEnd(0) && ic != domface.bigEnd(0) && rr[0] > 1) { Real sfx = Real(1.0); for (int n = 0; n < ncomp; ++n) { Real dc = Real(0.5) * (crse(ic+1,jc,kc,n) - crse(ic-1,jc,kc,n)); @@ -76,11 +74,11 @@ void interp_face_reg (int i, int j, int k, IntVect const& rr, Array4 const } Real xoff = (static_cast(i - ic*rr[0]) + Real(0.5)) / Real(rr[0]) - Real(0.5); for (int n = 0; n < ncomp; ++n) { - fine(i,j,k,n+scomp) = crse(ic,jc,kc,n) + xoff * slope(i,j,k,n) * sfx; + fine(i,j,k,n+scomp) += xoff * slope(i,j,k,n) * sfx; } } - if (kc != domface.smallEnd(2) && kc != domface.bigEnd(2)) { + if (kc != domface.smallEnd(2) && kc != domface.bigEnd(2) && rr[2] > 1) { Real sfz = Real(1.0); for (int n = 0; n < ncomp; ++n) { Real dc = Real(0.5) * (crse(ic,jc,kc+1,n) - crse(ic,jc,kc-1,n)); @@ -100,11 +98,10 @@ void interp_face_reg (int i, int j, int k, IntVect const& rr, Array4 const } } } else { - if (ic == domface.smallEnd(0) || ic == domface.bigEnd(0)) { - for (int n = 0; n < ncomp; ++n) { - fine(i,j,k,n+scomp) = crse(ic,jc,kc,n); - } - } else { + for (int n = 0; n < ncomp; ++n) { + fine(i,j,k,n+scomp) = crse(ic,jc,kc,n); + } + if (ic != domface.smallEnd(0) && ic != domface.bigEnd(0) && rr[0] > 1) { Real sfx = Real(1.0); for (int n = 0; n < ncomp; ++n) { Real dc = Real(0.5) * (crse(ic+1,jc,kc,n) - crse(ic-1,jc,kc,n)); @@ -120,11 +117,11 @@ void interp_face_reg (int i, int j, int k, IntVect const& rr, Array4 const } Real xoff = (static_cast(i - ic*rr[0]) + Real(0.5)) / Real(rr[0]) - Real(0.5); for (int n = 0; n < ncomp; ++n) { - fine(i,j,k,n+scomp) = crse(ic,jc,kc,n) + xoff * slope(i,j,k,n) * sfx; + fine(i,j,k,n+scomp) += xoff * slope(i,j,k,n) * sfx; } } - if (jc != domface.smallEnd(1) && jc != domface.bigEnd(1)) { + if (jc != domface.smallEnd(1) && jc != domface.bigEnd(1) && rr[1] > 1) { Real sfy = Real(1.0); for (int n = 0; n < ncomp; ++n) { Real dc = Real(0.5) * (crse(ic,jc+1,kc,n) - crse(ic,jc-1,kc,n));