diff --git a/.github/workflows/apps.yml b/.github/workflows/apps.yml
index e909193601b..2ff10f92462 100644
--- a/.github/workflows/apps.yml
+++ b/.github/workflows/apps.yml
@@ -52,7 +52,9 @@ jobs:
         export AMREX_HOME=${PWD}
         export MICROPHYSICS_HOME=${PWD}/Microphysics
         cd Castro/Exec/hydro_tests/Sedov/
-        make -j4 CCACHE=ccache USE_MPI=FALSE
+        make -j4 CCACHE=ccache USE_MPI=FALSE \
+            USE_LINEAR_SOLVERS_INCFLO=FALSE \
+            USE_LINEAR_SOLVERS_EM=FALSE
 
         ccache -s
         du -hs ~/.cache/ccache
@@ -92,7 +94,8 @@ jobs:
             -DWarpX_QED=OFF                       \
             -DWarpX_OPENPMD=OFF                   \
             -DCMAKE_VERBOSE_MAKEFILE=ON           \
-            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache  \
+            -DAMReX_LINEAR_SOLVER_INCFLO=OFF
         cmake --build WarpX/build -j 4
 
         ccache -s
diff --git a/Docs/sphinx_documentation/source/BuildingAMReX.rst b/Docs/sphinx_documentation/source/BuildingAMReX.rst
index 90fb4d6eb30..064d11e740a 100644
--- a/Docs/sphinx_documentation/source/BuildingAMReX.rst
+++ b/Docs/sphinx_documentation/source/BuildingAMReX.rst
@@ -463,6 +463,12 @@ The list of available options is reported in the :ref:`table <tab:cmakevar>` bel
    +------------------------------+-------------------------------------------------+-------------------------+-----------------------+
    | AMReX_LINEAR_SOLVERS         |  Build AMReX linear solvers                     | YES                     | YES, NO               |
    +------------------------------+-------------------------------------------------+-------------------------+-----------------------+
+   | AMReX_LINEAR_SOLVERS_INCFLO  |  Build AMReX linear solvers for incompressible  | YES                     | YES, NO               |
+   |                              |  flow                                           |                         |
+   +------------------------------+-------------------------------------------------+-------------------------+-----------------------+
+   | AMReX_LINEAR_SOLVERS_EM      |  Build AMReX linear solvers for electromagnetic | YES                     | YES, NO               |
+   |                              |  solvers                                        |                         |
+   +------------------------------+-------------------------------------------------+-------------------------+-----------------------+
    | AMReX_AMRDATA                |  Build data services                            | NO                      | YES, NO               |
    +------------------------------+-------------------------------------------------+-------------------------+-----------------------+
    | AMReX_AMRLEVEL               |  Build AmrLevel class                           | YES                     | YES, NO               |
@@ -681,6 +687,10 @@ A list of AMReX component names and related configure options are shown in the t
    +------------------------------+-----------------+
    | AMReX_LINEAR_SOLVERS         | LSOLVERS        |
    +------------------------------+-----------------+
+   | AMReX_LINEAR_SOLVERS_INCFLO  | LSOLVERS_INCFLO |
+   +------------------------------+-----------------+
+   | AMReX_LINEAR_SOLVERS_EM      | LSOLVERS_EM     |
+   +------------------------------+-----------------+
    | AMReX_AMRDATA                | AMRDATA         |
    +------------------------------+-----------------+
    | AMReX_AMRLEVEL               | AMRLEVEL        |
diff --git a/Src/Extern/HYPRE/AMReX_Habec_2D_K.H b/Src/Extern/HYPRE/AMReX_Habec_2D_K.H
index 731ad04d4bd..2d1d63432a0 100644
--- a/Src/Extern/HYPRE/AMReX_Habec_2D_K.H
+++ b/Src/Extern/HYPRE/AMReX_Habec_2D_K.H
@@ -6,7 +6,7 @@
 #include <AMReX_EBMultiFabUtil.H>
 #include <AMReX_MultiCutFab.H>
 #include <AMReX_EBFabFactory.H>
-#include <AMReX_MLEBABecLap_K.H>
+#include <AMReX_MLLinOp_K.H>
 #endif
 
 namespace amrex {
diff --git a/Src/Extern/HYPRE/AMReX_Habec_3D_K.H b/Src/Extern/HYPRE/AMReX_Habec_3D_K.H
index 5d5c054758e..6b4e67587d7 100644
--- a/Src/Extern/HYPRE/AMReX_Habec_3D_K.H
+++ b/Src/Extern/HYPRE/AMReX_Habec_3D_K.H
@@ -6,7 +6,7 @@
 #include <AMReX_EBMultiFabUtil.H>
 #include <AMReX_MultiCutFab.H>
 #include <AMReX_EBFabFactory.H>
-#include <AMReX_MLEBABecLap_K.H>
+#include <AMReX_MLLinOp_K.H>
 #endif
 
 namespace amrex {
diff --git a/Src/LinearSolvers/CMakeLists.txt b/Src/LinearSolvers/CMakeLists.txt
index 6287ef4b422..3368210480a 100644
--- a/Src/LinearSolvers/CMakeLists.txt
+++ b/Src/LinearSolvers/CMakeLists.txt
@@ -16,6 +16,8 @@ foreach(D IN LISTS AMReX_SPACEDIM)
        MLMG/AMReX_MLLinOp_K.H
        MLMG/AMReX_MLCellLinOp.H
        MLMG/AMReX_MLNodeLinOp.H
+       MLMG/AMReX_MLNodeLinOp_K.H
+       MLMG/AMReX_MLNodeLinOp_${D}D_K.H
        MLMG/AMReX_MLNodeLinOp.cpp
        MLMG/AMReX_MLCellABecLap.H
        MLMG/AMReX_MLCellABecLap_K.H
@@ -31,30 +33,6 @@ foreach(D IN LISTS AMReX_SPACEDIM)
        MLMG/AMReX_MLPoisson.H
        MLMG/AMReX_MLPoisson_K.H
        MLMG/AMReX_MLPoisson_${D}D_K.H
-       MLMG/AMReX_MLNodeLaplacian.H
-       MLMG/AMReX_MLNodeLaplacian.cpp
-       MLMG/AMReX_MLNodeLaplacian_sync.cpp
-       MLMG/AMReX_MLNodeLaplacian_sten.cpp
-       MLMG/AMReX_MLNodeLaplacian_misc.cpp
-       MLMG/AMReX_MLNodeLap_K.H
-       MLMG/AMReX_MLNodeLap_${D}D_K.H
-       MLMG/AMReX_MLNodeTensorLaplacian.H
-       MLMG/AMReX_MLNodeTensorLaplacian.cpp
-       MLMG/AMReX_MLNodeTensorLap_K.H
-       MLMG/AMReX_MLNodeTensorLap_${D}D_K.H
-       MLMG/AMReX_MLTensorOp.H
-       MLMG/AMReX_MLTensorOp.cpp
-       MLMG/AMReX_MLTensorOp_grad.cpp
-       MLMG/AMReX_MLTensor_K.H
-       MLMG/AMReX_MLTensor_${D}D_K.H
-       MLMG/AMReX_MLEBNodeFDLaplacian.H
-       MLMG/AMReX_MLEBNodeFDLaplacian.cpp
-       MLMG/AMReX_MLEBNodeFDLap_K.H
-       MLMG/AMReX_MLEBNodeFDLap_${D}D_K.H
-       MLMG/AMReX_MLNodeABecLaplacian.H
-       MLMG/AMReX_MLNodeABecLaplacian.cpp
-       MLMG/AMReX_MLNodeABecLap_K.H
-       MLMG/AMReX_MLNodeABecLap_${D}D_K.H
        AMReX_GMRES.H
        AMReX_GMRES_MLMG.H
        )
@@ -68,7 +46,7 @@ foreach(D IN LISTS AMReX_SPACEDIM)
           )
     endif ()
 
-    if (NOT D EQUAL 1)
+    if (NOT D EQUAL 1 AND AMReX_LINEAR_SOLVERS_EM)
        target_sources(amrex_${D}d
           PRIVATE
           MLMG/AMReX_MLCurlCurl.H
@@ -77,7 +55,7 @@ foreach(D IN LISTS AMReX_SPACEDIM)
           )
     endif ()
 
-    if (AMReX_EB AND NOT D EQUAL 1)
+    if (AMReX_EB AND NOT D EQUAL 1 AND AMReX_LINEAR_SOLVERS_INCFLO)
        target_sources(amrex_${D}d
           PRIVATE
           MLMG/AMReX_MLNodeLaplacian_eb.cpp
@@ -94,6 +72,42 @@ foreach(D IN LISTS AMReX_SPACEDIM)
           )
     endif ()
 
+    if (AMReX_LINEAR_SOLVERS_EM)
+       target_sources(amrex_${D}d
+          PRIVATE
+          MLMG/AMReX_MLEBNodeFDLaplacian.H
+          MLMG/AMReX_MLEBNodeFDLaplacian.cpp
+          MLMG/AMReX_MLEBNodeFDLap_K.H
+          MLMG/AMReX_MLEBNodeFDLap_${D}D_K.H
+          MLMG/AMReX_MLNodeTensorLaplacian.H
+          MLMG/AMReX_MLNodeTensorLaplacian.cpp
+          MLMG/AMReX_MLNodeTensorLap_K.H
+          MLMG/AMReX_MLNodeTensorLap_${D}D_K.H
+       )
+    endif ()
+
+    if (AMReX_LINEAR_SOLVERS_INCFLO)
+       target_sources(amrex_${D}d
+          PRIVATE
+          MLMG/AMReX_MLNodeABecLaplacian.H
+          MLMG/AMReX_MLNodeABecLaplacian.cpp
+          MLMG/AMReX_MLNodeABecLap_K.H
+          MLMG/AMReX_MLNodeABecLap_${D}D_K.H
+          MLMG/AMReX_MLNodeLaplacian.H
+          MLMG/AMReX_MLNodeLaplacian.cpp
+          MLMG/AMReX_MLNodeLaplacian_sync.cpp
+          MLMG/AMReX_MLNodeLaplacian_sten.cpp
+          MLMG/AMReX_MLNodeLaplacian_misc.cpp
+          MLMG/AMReX_MLNodeLap_K.H
+          MLMG/AMReX_MLNodeLap_${D}D_K.H
+          MLMG/AMReX_MLTensorOp.H
+          MLMG/AMReX_MLTensorOp.cpp
+          MLMG/AMReX_MLTensorOp_grad.cpp
+          MLMG/AMReX_MLTensor_K.H
+          MLMG/AMReX_MLTensor_${D}D_K.H
+       )
+    endif ()
+
     if (AMReX_FORTRAN)
        target_sources(amrex_${D}d
           PRIVATE
@@ -102,7 +116,7 @@ foreach(D IN LISTS AMReX_SPACEDIM)
           )
     endif ()
 
-    if (AMReX_HYPRE)
+    if (AMReX_HYPRE AND AMReX_LINEAR_SOLVERS_INCFLO)
        target_sources(amrex_${D}d
           PRIVATE
           MLMG/AMReX_MLNodeLaplacian_hypre.cpp
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_K.H
index 517b1875bc3..434789c5c5f 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_K.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_K.H
@@ -6,13 +6,6 @@
 
 #include <AMReX_EBCellFlag.H>
 
-namespace amrex {
-    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-    Real get_dx_eb (Real kappa) noexcept {
-        return amrex::max(Real(0.3),(kappa*kappa-Real(0.25))/(Real(2.0)*kappa));
-    }
-}
-
 #if (AMREX_SPACEDIM == 2)
 #include <AMReX_MLEBABecLap_2D_K.H>
 #else
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp
index af4a6a6d742..0a0bdf39a10 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp
+++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp
@@ -1,6 +1,6 @@
 #include <AMReX_MLEBNodeFDLaplacian.H>
 #include <AMReX_MLEBNodeFDLap_K.H>
-#include <AMReX_MLNodeLap_K.H>
+#include <AMReX_MLNodeLinOp_K.H>
 #include <AMReX_MLNodeTensorLap_K.H>
 #include <AMReX_MultiFabUtil.H>
 
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp_K.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp_K.H
index a6bc6517366..74fcc1302f7 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp_K.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp_K.H
@@ -1053,6 +1053,15 @@ void mllinop_apply_innu_zhi (int i, int j, int k,
     }
 }
 
+#ifdef AMREX_USE_EB
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+Real get_dx_eb (Real kappa) noexcept {
+    return amrex::max(Real(0.3),(kappa*kappa-Real(0.25))/(Real(2.0)*kappa));
+}
+
+#endif
+
 }
 
 #endif
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_1D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_1D_K.H
index 91d02257396..7b6bc1e1fc3 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_1D_K.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_1D_K.H
@@ -4,79 +4,6 @@
 
 namespace amrex {
 
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_set_nodal_mask (int i, int, int, Array4<int> const& nmsk,
-                             Array4<int const> const& cmsk) noexcept
-{
-    using namespace nodelap_detail;
-
-    int s = cmsk(i-1,0,0) + cmsk(i,0,0);
-    if (s == 2*crse_cell) {
-        nmsk(i,0,0) = crse_node;
-    } else if (s == 2*fine_cell) {
-        nmsk(i,0,0) = fine_node;
-    } else {
-        nmsk(i,0,0) = crse_fine_node;
-    }
-}
-
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_set_dirichlet_mask (Box const& bx, Array4<int> const& dmsk,
-                                 Array4<int const> const& omsk, Box const& dom,
-                                 GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
-                                 GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
-{
-    const auto lo = bx.smallEnd(0);
-    const auto hi = bx.bigEnd(0);
-    AMREX_PRAGMA_SIMD
-    for (int i = lo; i <= hi; ++i) {
-        if (!dmsk(i,0,0)) {
-            dmsk(i,0,0) = (omsk(i-1,0,0) == 1 || omsk(i,0,0) == 1);
-        }
-    }
-
-    const auto domlo = dom.smallEnd(0);
-    const auto domhi = dom.bigEnd(0);
-
-    if (bclo[0] == LinOpBCType::Dirichlet && lo == domlo) {
-        dmsk(lo,0,0) = 1;
-    }
-
-    if (bchi[0] == LinOpBCType::Dirichlet && hi == domhi) {
-        dmsk(hi,0,0) = 1;
-    }
-}
-
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_set_dot_mask (Box const& bx, Array4<Real> const& dmsk,
-                           Array4<int const> const& omsk, Box const& dom,
-                           GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
-                           GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
-{
-    const auto lo = bx.smallEnd(0);
-    const auto hi = bx.bigEnd(0);
-
-    AMREX_PRAGMA_SIMD
-    for (int i = lo; i <= hi; ++i) {
-        dmsk(i,0,0) = static_cast<Real>(omsk(i,0,0));
-    }
-
-    const auto domlo = dom.smallEnd(0);
-    const auto domhi = dom.bigEnd(0);
-
-    if ((bclo[0] == LinOpBCType::Neumann || bclo[0] == LinOpBCType::inflow)
-        && lo == domlo)
-    {
-        dmsk(lo,0,0) *= Real(0.5);
-    }
-
-    if ((bchi[0] == LinOpBCType::Neumann || bchi[0] == LinOpBCType::inflow)
-        && hi == domhi)
-    {
-        dmsk(hi,0,0) *= Real(0.5);
-    }
-}
-
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 void mlndlap_zero_fine (int i, int, int, Array4<Real> const& phi,
                         Array4<int const> const& msk, int fine_flag) noexcept
@@ -106,39 +33,6 @@ void mlndlap_semi_avgdown_coeff (int i, int j, int k, Array4<Real> const& crse,
     mlndlap_avgdown_coeff_x(i,j,k,crse,fine);
 }
 
-template <typename T>
-void mlndlap_bc_doit (Box const& vbx, Array4<T> const& a, Box const& domain,
-                      GpuArray<bool,AMREX_SPACEDIM> const& bflo,
-                      GpuArray<bool,AMREX_SPACEDIM> const& bfhi) noexcept
-{
-    Box gdomain = domain;
-    int const idim = 0;
-    if (! bflo[idim]) { gdomain.growLo(idim,1); }
-    if (! bfhi[idim]) { gdomain.growHi(idim,1); }
-
-    if (gdomain.strictly_contains(vbx)) { return; }
-
-    const int offset = domain.cellCentered() ? 0 : 1;
-
-    const auto dlo = domain.smallEnd(0);
-    const auto dhi = domain.bigEnd(0);
-
-    Box const& sbox = amrex::grow(vbx,1);
-    AMREX_HOST_DEVICE_FOR_3D(sbox, i, j, k,
-    {
-        if (! gdomain.contains(IntVect(i))) {
-            if (i == dlo-1 && bflo[0])
-            {
-                a(i,0,0) = a(i+1+offset, j, k);
-            }
-            else if (i == dhi+1 && bfhi[0])
-            {
-                a(i,0,0) = a(i-1-offset, j, k);
-            }
-        }
-    });
-}
-
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 Real mlndlap_adotx_c (int i, int, int, Array4<Real const> const& x,
                       Real sigma, Array4<int const> const& msk,
@@ -335,59 +229,6 @@ void mlndlap_gauss_seidel_with_line_solve_aa(Box const&, Array4<Real> const&,
     amrex::Abort("mlndlap_gauss_seidel_with_line_solve_aa: not implemented in 1D");
 }
 
-
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_restriction (int i, int, int, Array4<Real> const& crse,
-                          Array4<Real const> const& fine, Array4<int const> const& msk) noexcept
-{
-    int ii = i*2;
-    if (msk(ii,0,0)) {
-        crse(i,0,0) = Real(0.0);
-    } else {
-        crse(i,0,0) = Real(1./4.)  *(fine(ii-1,0,0)
-                         + Real(2.)* fine(ii  ,0,0)
-                         +           fine(ii+1,0,0));
-    }
-}
-
-template <int rr>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_restriction (int i, int, int, Array4<Real> const& crse,
-                          Array4<Real const> const& fine, Array4<int const> const& msk,
-                          Box const& fdom,
-                          GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
-                          GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
-
-{
-    const int ii = i*rr;
-    if (msk(ii,0,0)) {
-        crse(i,0,0) = Real(0.0);
-    } else {
-        const auto ndlo = fdom.smallEnd(0);
-        const auto ndhi = fdom.bigEnd(0);
-        Real tmp = Real(0.0);
-        for (int ioff = -rr+1; ioff <= rr-1; ++ioff) {
-            Real wx = rr - std::abs(ioff);
-            int itmp = ii + ioff;
-            if ((itmp < ndlo && (bclo[0] == LinOpBCType::Neumann ||
-                                 bclo[0] == LinOpBCType::inflow)) ||
-                (itmp > ndhi && (bchi[0] == LinOpBCType::Neumann ||
-                                 bchi[0] == LinOpBCType::inflow))) {
-                itmp = ii - ioff;
-            }
-            tmp += wx*fine(itmp,0,0);
-        }
-        crse(i,0,0) = tmp*(Real(1.0)/Real(rr*rr));
-    }
-}
-
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_semi_restriction (int /*i*/, int /*j*/, int /*k*/, Array4<Real> const&,
-                          Array4<Real const> const&, Array4<int const> const&, int) noexcept
-{
-    amrex::Abort("mlndlap_semi_restriction: not implemented in 1D");
-}
-
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 void mlndlap_interpadd_c (int i, int , int, Array4<Real> const& fine,
                           Array4<Real const> const& crse,
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H
index 05f02aaa927..db6922c4105 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H
@@ -4,127 +4,6 @@
 
 namespace amrex {
 
-//
-// masks
-//
-
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_set_nodal_mask (int i, int j, int k, Array4<int> const& nmsk,
-                             Array4<int const> const& cmsk) noexcept
-{
-    using namespace nodelap_detail;
-
-    int s = cmsk(i-1,j-1,k) + cmsk(i  ,j-1,k)
-        +   cmsk(i-1,j  ,k) + cmsk(i  ,j  ,k);
-    if (s == 4*crse_cell) {
-        nmsk(i,j,k) = crse_node;
-    }
-    else if (s == 4*fine_cell) {
-        nmsk(i,j,k) = fine_node;
-    } else {
-        nmsk(i,j,k) = crse_fine_node;
-    }
-}
-
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_set_dirichlet_mask (Box const& bx, Array4<int> const& dmsk,
-                                 Array4<int const> const& omsk, Box const& dom,
-                                 GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
-                                 GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
-{
-    const auto lo = amrex::lbound(bx);
-    const auto hi = amrex::ubound(bx);
-    for (int j = lo.y; j <= hi.y; ++j) {
-    AMREX_PRAGMA_SIMD
-    for (int i = lo.x; i <= hi.x; ++i) {
-        if (!dmsk(i,j,0)) {
-            dmsk(i,j,0) = (omsk(i-1,j-1,0) == 1 || omsk(i,j-1,0) == 1 ||
-                           omsk(i-1,j  ,0) == 1 || omsk(i,j  ,0) == 1);
-        }
-    }}
-
-    const auto domlo = amrex::lbound(dom);
-    const auto domhi = amrex::ubound(dom);
-
-    if (bclo[0] == LinOpBCType::Dirichlet && lo.x == domlo.x) {
-        for (int j = lo.y; j <= hi.y; ++j) {
-            dmsk(lo.x,j,0) = 1;
-        }
-    }
-
-    if (bchi[0] == LinOpBCType::Dirichlet && hi.x == domhi.x) {
-        for (int j = lo.y; j <= hi.y; ++j) {
-            dmsk(hi.x,j,0) = 1;
-        }
-    }
-
-    if (bclo[1] == LinOpBCType::Dirichlet && lo.y == domlo.y) {
-        AMREX_PRAGMA_SIMD
-        for (int i = lo.x; i <= hi.x; ++i) {
-            dmsk(i,lo.y,0) = 1;
-        }
-    }
-
-    if (bchi[1] == LinOpBCType::Dirichlet && hi.y == domhi.y) {
-        AMREX_PRAGMA_SIMD
-        for (int i = lo.x; i <= hi.x; ++i) {
-            dmsk(i,hi.y,0) = 1;
-        }
-    }
-}
-
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_set_dot_mask (Box const& bx, Array4<Real> const& dmsk,
-                           Array4<int const> const& omsk, Box const& dom,
-                           GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
-                           GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
-{
-    const auto lo = amrex::lbound(bx);
-    const auto hi = amrex::ubound(bx);
-    for (int j = lo.y; j <= hi.y; ++j) {
-    AMREX_PRAGMA_SIMD
-    for (int i = lo.x; i <= hi.x; ++i) {
-        dmsk(i,j,0) = static_cast<Real>(omsk(i,j,0));
-    }}
-
-    const auto domlo = amrex::lbound(dom);
-    const auto domhi = amrex::ubound(dom);
-
-    if ((bclo[0] == LinOpBCType::Neumann || bclo[0] == LinOpBCType::inflow)
-        && lo.x == domlo.x)
-    {
-        for (int j = lo.y; j <= hi.y; ++j) {
-            dmsk(lo.x,j,0) *= Real(0.5);
-        }
-    }
-
-    if ((bchi[0] == LinOpBCType::Neumann || bchi[0] == LinOpBCType::inflow)
-        && hi.x == domhi.x)
-    {
-        for (int j = lo.y; j <= hi.y; ++j) {
-            dmsk(hi.x,j,0) *= Real(0.5);
-        }
-    }
-
-    if ((bclo[1] == LinOpBCType::Neumann || bclo[1] == LinOpBCType::inflow)
-        && lo.y == domlo.y)
-    {
-        AMREX_PRAGMA_SIMD
-        for (int i = lo.x; i <= hi.x; ++i) {
-            dmsk(i,lo.y,0) *= Real(0.5);
-        }
-    }
-
-    if ((bchi[1] == LinOpBCType::Neumann || bchi[1] == LinOpBCType::inflow)
-        && hi.y == domhi.y)
-    {
-        AMREX_PRAGMA_SIMD
-        for (int i = lo.x; i <= hi.x; ++i) {
-            dmsk(i,hi.y,0) *= Real(0.5);
-        }
-    }
-}
-
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 void mlndlap_zero_fine (int i, int j, int, Array4<Real> const& phi,
                         Array4<int const> const& msk, int fine_flag) noexcept
@@ -177,116 +56,6 @@ void mlndlap_semi_avgdown_coeff (int i, int j, int k, Array4<Real> const& crse,
     }
 }
 
-//
-// bc
-//
-
-template <typename T>
-void mlndlap_bc_doit (Box const& vbx, Array4<T> const& a, Box const& domain,
-                      GpuArray<bool,AMREX_SPACEDIM> const& bflo,
-                      GpuArray<bool,AMREX_SPACEDIM> const& bfhi) noexcept
-{
-    Box gdomain = domain;
-    for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
-        if (! bflo[idim]) { gdomain.growLo(idim,1); }
-        if (! bfhi[idim]) { gdomain.growHi(idim,1); }
-    }
-
-    if (gdomain.strictly_contains(vbx)) { return; }
-
-    const int offset = domain.cellCentered() ? 0 : 1;
-
-    const auto dlo = amrex::lbound(domain);
-    const auto dhi = amrex::ubound(domain);
-
-    Box const& sbox = amrex::grow(vbx,1);
-    AMREX_HOST_DEVICE_FOR_3D(sbox, i, j, k,
-    {
-        if (! gdomain.contains(IntVect(i,j))) {
-            // xlo & ylo
-            if (i == dlo.x-1 && j == dlo.y-1 && (bflo[0] || bflo[1]))
-            {
-                if (bflo[0] && bflo[1])
-                {
-                    a(i,j,k) = a(i+1+offset, j+1+offset, k);
-                }
-                else if (bflo[0])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k);
-                }
-                else if (bflo[1])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k);
-                }
-            }
-            // xhi & ylo
-            else if (i == dhi.x+1 && j == dlo.y-1 && (bfhi[0] || bflo[1]))
-            {
-                if (bfhi[0] && bflo[1])
-                {
-                    a(i,j,k) = a(i-1-offset, j+1+offset, k);
-                }
-                else if (bfhi[0])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k);
-                }
-                else if (bflo[1])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k);
-                }
-            }
-            // xlo & yhi
-            else if (i == dlo.x-1 && j == dhi.y+1 && (bflo[0] || bfhi[1]))
-            {
-                if (bflo[0] && bfhi[1])
-                {
-                    a(i,j,k) = a(i+1+offset, j-1-offset, k);
-                }
-                else if (bflo[0])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k);
-                }
-                else if (bfhi[1])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k);
-                }
-            }
-            // xhi & yhi
-            else if (i == dhi.x+1 && j == dhi.y+1 && (bfhi[0] || bfhi[1]))
-            {
-                if (bfhi[0] && bfhi[1])
-                {
-                    a(i,j,k) = a(i-1-offset, j-1-offset, k);
-                }
-                else if (bfhi[0])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k);
-                }
-                else if (bfhi[1])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k);
-                }
-            }
-            else if (i == dlo.x-1 && bflo[0])
-            {
-                a(i,j,k) = a(i+1+offset, j, k);
-            }
-            else if (i == dhi.x+1 && bfhi[0])
-            {
-                a(i,j,k) = a(i-1-offset, j, k);
-            }
-            else if (j == dlo.y-1 && bflo[1])
-            {
-                a(i,j,k) = a(i, j+1+offset, k);
-            }
-            else if (j == dhi.y+1 && bfhi[1])
-            {
-                a(i,j,k) = a(i, j-1-offset, k);
-            }
-        }
-    });
-}
-
 //
 // operator
 //
@@ -796,91 +565,6 @@ void mlndlap_gauss_seidel_with_line_solve_aa (Box const& bx, Array4<Real> const&
 
 }
 
-//
-// restriction
-//
-
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_restriction (int i, int j, int k, Array4<Real> const& crse,
-                          Array4<Real const> const& fine, Array4<int const> const& msk) noexcept
-{
-    int ii = i*2;
-    int jj = j*2;
-    int kk = 0;
-    if (msk(ii,jj,kk)) {
-        crse(i,j,k) = Real(0.0);
-    } else {
-        crse(i,j,k) = Real(1./16.)*(fine(ii-1,jj-1,kk) + Real(2.)*fine(ii  ,jj-1,kk) +          fine(ii+1,jj-1,kk)
-                         + Real(2.)*fine(ii-1,jj  ,kk) + Real(4.)*fine(ii  ,jj  ,kk) + Real(2.)*fine(ii+1,jj  ,kk)
-                                  + fine(ii-1,jj+1,kk) + Real(2.)*fine(ii  ,jj+1,kk) +          fine(ii+1,jj+1,kk));
-    }
-}
-
-template <int rr>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_restriction (int i, int j, int k, Array4<Real> const& crse,
-                          Array4<Real const> const& fine, Array4<int const> const& msk,
-                          Box const& fdom,
-                          GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
-                          GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
-{
-    const int ii = i*rr;
-    const int jj = j*rr;
-    if (msk(ii,jj,0)) {
-        crse(i,j,k) = Real(0.0);
-    } else {
-        const auto ndlo = amrex::lbound(fdom);
-        const auto ndhi = amrex::ubound(fdom);
-        Real tmp = Real(0.0);
-        for (int joff = -rr+1; joff <= rr-1; ++joff) {
-            Real wy = rr - std::abs(joff);
-            for (int ioff = -rr+1; ioff <= rr-1; ++ioff) {
-                Real wx = rr - std::abs(ioff);
-                int itmp = ii + ioff;
-                int jtmp = jj + joff;
-                if ((itmp < ndlo.x && (bclo[0] == LinOpBCType::Neumann ||
-                                       bclo[0] == LinOpBCType::inflow)) ||
-                    (itmp > ndhi.x && (bchi[0] == LinOpBCType::Neumann ||
-                                       bchi[0] == LinOpBCType::inflow))) {
-                    itmp = ii - ioff;
-                }
-                if ((jtmp < ndlo.y && (bclo[1] == LinOpBCType::Neumann ||
-                                       bclo[1] == LinOpBCType::inflow)) ||
-                    (jtmp > ndhi.y && (bchi[1] == LinOpBCType::Neumann ||
-                                       bchi[1] == LinOpBCType::inflow))) {
-                    jtmp = jj - joff;
-                }
-                tmp += wx*wy*fine(itmp,jtmp,0);
-            }
-        }
-        crse(i,j,k) = tmp*(Real(1.0)/Real(rr*rr*rr*rr));
-    }
-}
-
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_semi_restriction (int i, int j, int k, Array4<Real> const& crse,
-                          Array4<Real const> const& fine, Array4<int const> const& msk, int idir) noexcept
-{
-    int kk = 0;
-    if (idir == 1) {
-        int ii = i*2;
-        int jj = j;
-        if (msk(ii,jj,kk)) {
-            crse(i,j,k) = Real(0.0);
-        } else {
-            crse(i,j,k) = Real(1./4.)*(fine(ii-1,jj,kk) + Real(2.)*fine(ii,jj,kk) + fine(ii+1,jj,kk));
-        }
-    } else if (idir == 0) {
-        int ii = i;
-        int jj = j*2;
-        if (msk(ii,jj,kk)) {
-            crse(i,j,k) = Real(0.0);
-        } else {
-            crse(i,j,k) = Real(1./4.)*(fine(ii,jj-1,kk) + Real(2.)*fine(ii,jj,kk) + fine(ii,jj+1,kk));
-        }
-    }
-}
-
 //
 // interpolation
 //
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_3D_K.H
index 5d31de02711..2ddcecfe37d 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_3D_K.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_3D_K.H
@@ -4,177 +4,6 @@
 
 namespace amrex {
 
-//
-// masks
-//
-
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_set_nodal_mask (int i, int j, int k, Array4<int> const& nmsk,
-                             Array4<int const> const& cmsk) noexcept
-{
-    using namespace nodelap_detail;
-
-    int s = cmsk(i-1,j-1,k-1) + cmsk(i  ,j-1,k-1)
-        +   cmsk(i-1,j  ,k-1) + cmsk(i  ,j  ,k-1)
-        +   cmsk(i-1,j-1,k  ) + cmsk(i  ,j-1,k  )
-        +   cmsk(i-1,j  ,k  ) + cmsk(i  ,j  ,k  );
-    if (s == 8*crse_cell) {
-        nmsk(i,j,k) = crse_node;
-    }
-    else if (s == 8*fine_cell) {
-        nmsk(i,j,k) = fine_node;
-    } else {
-        nmsk(i,j,k) = crse_fine_node;
-    }
-}
-
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_set_dirichlet_mask (Box const& bx, Array4<int> const& dmsk,
-                                 Array4<int const> const& omsk, Box const& dom,
-                                 GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
-                                 GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
-{
-    const auto lo = amrex::lbound(bx);
-    const auto hi = amrex::ubound(bx);
-    for (int k = lo.z; k <= hi.z; ++k) {
-    for (int j = lo.y; j <= hi.y; ++j) {
-    AMREX_PRAGMA_SIMD
-    for (int i = lo.x; i <= hi.x; ++i) {
-        if (!dmsk(i,j,k)) {
-            dmsk(i,j,k) = (omsk(i-1,j-1,k-1) == 1 || omsk(i,j-1,k-1) == 1 ||
-                           omsk(i-1,j  ,k-1) == 1 || omsk(i,j  ,k-1) == 1 ||
-                           omsk(i-1,j-1,k  ) == 1 || omsk(i,j-1,k  ) == 1 ||
-                           omsk(i-1,j  ,k  ) == 1 || omsk(i,j  ,k  ) == 1);
-        }
-    }}}
-
-    const auto domlo = amrex::lbound(dom);
-    const auto domhi = amrex::ubound(dom);
-
-    if (bclo[0] == LinOpBCType::Dirichlet && lo.x == domlo.x) {
-        for (int k = lo.z; k <= hi.z; ++k) {
-        for (int j = lo.y; j <= hi.y; ++j) {
-            dmsk(lo.x,j,k) = 1;
-        }}
-    }
-
-    if (bchi[0] == LinOpBCType::Dirichlet && hi.x == domhi.x) {
-        for (int k = lo.z; k <= hi.z; ++k) {
-        for (int j = lo.y; j <= hi.y; ++j) {
-            dmsk(hi.x,j,k) = 1;
-        }}
-    }
-
-    if (bclo[1] == LinOpBCType::Dirichlet && lo.y == domlo.y) {
-        for (int k = lo.z; k <= hi.z; ++k) {
-        AMREX_PRAGMA_SIMD
-        for (int i = lo.x; i <= hi.x; ++i) {
-            dmsk(i,lo.y,k) = 1;
-        }}
-    }
-
-    if (bchi[1] == LinOpBCType::Dirichlet && hi.y == domhi.y) {
-        for (int k = lo.z; k <= hi.z; ++k) {
-        AMREX_PRAGMA_SIMD
-        for (int i = lo.x; i <= hi.x; ++i) {
-            dmsk(i,hi.y,k) = 1;
-        }}
-    }
-
-    if (bclo[2] == LinOpBCType::Dirichlet && lo.z == domlo.z) {
-        for (int j = lo.y; j <= hi.y; ++j) {
-        AMREX_PRAGMA_SIMD
-        for (int i = lo.x; i <= hi.x; ++i) {
-            dmsk(i,j,lo.z) = 1;
-        }}
-    }
-
-    if (bchi[2] == LinOpBCType::Dirichlet && hi.z == domhi.z) {
-        for (int j = lo.y; j <= hi.y; ++j) {
-        AMREX_PRAGMA_SIMD
-        for (int i = lo.x; i <= hi.x; ++i) {
-            dmsk(i,j,hi.z) = 1;
-        }}
-    }
-}
-
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_set_dot_mask (Box const& bx, Array4<Real> const& dmsk,
-                           Array4<int const> const& omsk, Box const& dom,
-                           GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
-                           GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
-{
-    const auto lo = amrex::lbound(bx);
-    const auto hi = amrex::ubound(bx);
-    for (int k = lo.z; k <= hi.z; ++k) {
-    for (int j = lo.y; j <= hi.y; ++j) {
-    AMREX_PRAGMA_SIMD
-    for (int i = lo.x; i <= hi.x; ++i) {
-        dmsk(i,j,k) = static_cast<Real>(omsk(i,j,k));
-    }}}
-
-    const auto domlo = amrex::lbound(dom);
-    const auto domhi = amrex::ubound(dom);
-
-    if ((bclo[0] == LinOpBCType::Neumann || bclo[0] == LinOpBCType::inflow)
-        && lo.x == domlo.x)
-    {
-        for (int k = lo.z; k <= hi.z; ++k) {
-        for (int j = lo.y; j <= hi.y; ++j) {
-            dmsk(lo.x,j,k) *= Real(0.5);
-        }}
-    }
-
-    if ((bchi[0] == LinOpBCType::Neumann || bchi[0] == LinOpBCType::inflow)
-        && hi.x == domhi.x)
-    {
-        for (int k = lo.z; k <= hi.z; ++k) {
-        for (int j = lo.y; j <= hi.y; ++j) {
-            dmsk(hi.x,j,k) *= Real(0.5);
-        }}
-    }
-
-    if ((bclo[1] == LinOpBCType::Neumann || bclo[1] == LinOpBCType::inflow)
-        && lo.y == domlo.y)
-    {
-        for (int k = lo.z; k <= hi.z; ++k) {
-        AMREX_PRAGMA_SIMD
-        for (int i = lo.x; i <= hi.x; ++i) {
-            dmsk(i,lo.y,k) *= Real(0.5);
-        }}
-    }
-
-    if ((bchi[1] == LinOpBCType::Neumann || bchi[1] == LinOpBCType::inflow)
-        && hi.y == domhi.y)
-    {
-        for (int k = lo.z; k <= hi.z; ++k) {
-        AMREX_PRAGMA_SIMD
-        for (int i = lo.x; i <= hi.x; ++i) {
-            dmsk(i,hi.y,k) *= Real(0.5);
-        }}
-    }
-
-    if ((bclo[2] == LinOpBCType::Neumann || bclo[2] == LinOpBCType::inflow)
-        && lo.z == domlo.z)
-    {
-        for (int j = lo.y; j <= hi.y; ++j) {
-        AMREX_PRAGMA_SIMD
-        for (int i = lo.x; i <= hi.x; ++i) {
-            dmsk(i,j,lo.z) *= Real(0.5);
-        }}
-    }
-
-    if ((bchi[2] == LinOpBCType::Neumann || bchi[2] == LinOpBCType::inflow)
-        && hi.z == domhi.z)
-    {
-        for (int j = lo.y; j <= hi.y; ++j) {
-        AMREX_PRAGMA_SIMD
-        for (int i = lo.x; i <= hi.x; ++i) {
-            dmsk(i,j,hi.z) *= Real(0.5);
-        }}
-    }
-}
-
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 void mlndlap_zero_fine (int i, int j, int k, Array4<Real> const& phi,
                         Array4<int const> const& msk, int fine_flag) noexcept
@@ -249,507 +78,6 @@ void mlndlap_semi_avgdown_coeff (int i, int j, int k, Array4<Real> const& crse,
         crse(i,j,k) = cl*cr/(cl+cr);
     }
 }
-//
-// bc
-//
-
-template <typename T>
-inline void mlndlap_bc_doit (Box const& vbx, Array4<T> const& a, Box const& domain,
-                             GpuArray<bool,AMREX_SPACEDIM> const& bflo,
-                             GpuArray<bool,AMREX_SPACEDIM> const& bfhi) noexcept
-{
-    Box gdomain = domain;
-    for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
-        if (! bflo[idim]) { gdomain.growLo(idim,1); }
-        if (! bfhi[idim]) { gdomain.growHi(idim,1); }
-    }
-
-    if (gdomain.strictly_contains(vbx)) { return; }
-
-    const int offset = domain.cellCentered() ? 0 : 1;
-
-    const auto dlo = amrex::lbound(domain);
-    const auto dhi = amrex::ubound(domain);
-
-    Box const& sbox = amrex::grow(vbx,1);
-    AMREX_HOST_DEVICE_FOR_3D(sbox, i, j, k,
-    {
-        if (! gdomain.contains(IntVect(i,j,k))) {
-            // xlo & ylo & zlo
-            if (i == dlo.x-1 && j == dlo.y-1 && k == dlo.z-1 && (bflo[0] || bflo[1] || bflo[2]))
-            {
-                if (bflo[0] && bflo[1] && bflo[2])
-                {
-                    a(i,j,k) = a(i+1+offset, j+1+offset, k+1+offset);
-                }
-                else if (bflo[0] && bflo[1])
-                {
-                    a(i,j,k) = a(i+1+offset, j+1+offset, k);
-                }
-                else if (bflo[0] && bflo[2])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k+1+offset);
-                }
-                else if (bflo[1] && bflo[2])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k+1+offset);
-                }
-                else if (bflo[0])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k);
-                }
-                else if (bflo[1])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k);
-                }
-                else if (bflo[2])
-                {
-                    a(i,j,k) = a(i, j, k+1+offset);
-                }
-            }
-            // xhi & ylo & zlo
-            else if (i == dhi.x+1 && j == dlo.y-1 && k == dlo.z-1 && (bfhi[0] || bflo[1] || bflo[2]))
-            {
-                if (bfhi[0] && bflo[1] && bflo[2])
-                {
-                    a(i,j,k) = a(i-1-offset, j+1+offset, k+1+offset);
-                }
-                else if (bfhi[0] && bflo[1])
-                {
-                    a(i,j,k) = a(i-1-offset, j+1+offset, k);
-                }
-                else if (bfhi[0] && bflo[2])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k+1+offset);
-                }
-                else if (bflo[1] && bflo[2])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k+1+offset);
-                }
-                else if (bfhi[0])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k);
-                }
-                else if (bflo[1])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k);
-                }
-                else if (bflo[2])
-                {
-                    a(i,j,k) = a(i, j, k+1+offset);
-                }
-            }
-            // xlo & yhi & zlo
-            else if (i == dlo.x-1 && j == dhi.y+1 && k == dlo.z-1 && (bflo[0] || bfhi[1] || bflo[2]))
-            {
-                if (bflo[0] && bfhi[1] && bflo[2])
-                {
-                    a(i,j,k) = a(i+1+offset, j-1-offset, k+1+offset);
-                }
-                else if (bflo[0] && bfhi[1])
-                {
-                    a(i,j,k) = a(i+1+offset, j-1-offset, k);
-                }
-                else if (bflo[0] && bflo[2])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k+1+offset);
-                }
-                else if (bfhi[1] && bflo[2])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k+1+offset);
-                }
-                else if (bflo[0])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k);
-                }
-                else if (bfhi[1])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k);
-                }
-                else if (bflo[2])
-                {
-                    a(i,j,k) = a(i, j, k+1+offset);
-                }
-            }
-            // xhi & yhi & zlo
-            else if (i == dhi.x+1 && j == dhi.y+1 && k == dlo.z-1 && (bfhi[0] || bfhi[1] || bflo[2]))
-            {
-                if (bfhi[0] && bfhi[1] && bflo[2])
-                {
-                    a(i,j,k) = a(i-1-offset, j-1-offset, k+1+offset);
-                }
-                else if (bfhi[0] && bfhi[1])
-                {
-                    a(i,j,k) = a(i-1-offset, j-1-offset, k);
-                }
-                else if (bfhi[0] && bflo[2])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k+1+offset);
-                }
-                else if (bfhi[1] && bflo[2])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k+1+offset);
-                }
-                else if (bfhi[0])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k);
-                }
-                else if (bfhi[1])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k);
-                }
-                else if (bflo[2])
-                {
-                    a(i,j,k) = a(i, j, k+1+offset);
-                }
-            }
-            // xlo & ylo & zhi
-            else if (i == dlo.x-1 && j == dlo.y-1 && k == dhi.z+1 && (bflo[0] || bflo[1] || bfhi[2]))
-            {
-                if (bflo[0] && bflo[1] && bfhi[2])
-                {
-                    a(i,j,k) = a(i+1+offset, j+1+offset, k-1-offset);
-                }
-                else if (bflo[0] && bflo[1])
-                {
-                    a(i,j,k) = a(i+1+offset, j+1+offset, k);
-                }
-                else if (bflo[0] && bfhi[2])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k-1-offset);
-                }
-                else if (bflo[1] && bfhi[2])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k-1-offset);
-                }
-                else if (bflo[0])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k);
-                }
-                else if (bflo[1])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k);
-                }
-                else if (bfhi[2])
-                {
-                    a(i,j,k) = a(i, j, k-1-offset);
-                }
-            }
-            // xhi & ylo & zhi
-            else if (i == dhi.x+1 && j == dlo.y-1 && k == dhi.z+1 && (bfhi[0] || bflo[1] || bfhi[2]))
-            {
-                if (bfhi[0] && bflo[1] && bfhi[2])
-                {
-                    a(i,j,k) = a(i-1-offset, j+1+offset, k-1-offset);
-                }
-                else if (bfhi[0] && bflo[1])
-                {
-                    a(i,j,k) = a(i-1-offset, j+1+offset, k);
-                }
-                else if (bfhi[0] && bfhi[2])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k-1-offset);
-                }
-                else if (bflo[1] && bfhi[2])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k-1-offset);
-                }
-                else if (bfhi[0])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k);
-                }
-                else if (bflo[1])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k);
-                }
-                else if (bfhi[2])
-                {
-                    a(i,j,k) = a(i, j, k-1-offset);
-                }
-            }
-            // xlo & yhi & zhi
-            else if (i == dlo.x-1 && j == dhi.y+1 && k == dhi.z+1 && (bflo[0] || bfhi[1] || bfhi[2]))
-            {
-                if (bflo[0] && bfhi[1] && bfhi[2])
-                {
-                    a(i,j,k) = a(i+1+offset, j-1-offset, k-1-offset);
-                }
-                else if (bflo[0] && bfhi[1])
-                {
-                    a(i,j,k) = a(i+1+offset, j-1-offset, k);
-                }
-                else if (bflo[0] && bfhi[2])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k-1-offset);
-                }
-                else if (bfhi[1] && bfhi[2])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k-1-offset);
-                }
-                else if (bflo[0])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k);
-                }
-                else if (bfhi[1])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k);
-                }
-                else if (bfhi[2])
-                {
-                    a(i,j,k) = a(i, j, k-1-offset);
-                }
-            }
-            // xhi & yhi & zhi
-            else if (i == dhi.x+1 && j == dhi.y+1 && k == dhi.z+1 && (bfhi[0] || bfhi[1] || bfhi[2]))
-            {
-                if (bfhi[0] && bfhi[1] && bfhi[2])
-                {
-                    a(i,j,k) = a(i-1-offset, j-1-offset, k-1-offset);
-                }
-                else if (bfhi[0] && bfhi[1])
-                {
-                    a(i,j,k) = a(i-1-offset, j-1-offset, k);
-                }
-                else if (bfhi[0] && bfhi[2])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k-1-offset);
-                }
-                else if (bfhi[1] && bfhi[2])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k-1-offset);
-                }
-                else if (bfhi[0])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k);
-                }
-                else if (bfhi[1])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k);
-                }
-                else if (bfhi[2])
-                {
-                    a(i,j,k) = a(i, j, k-1-offset);
-                }
-            }
-            // xlo & ylo
-            else if (i == dlo.x-1 && j == dlo.y-1 && (bflo[0] || bflo[1]))
-            {
-                if (bflo[0] && bflo[1])
-                {
-                    a(i,j,k) = a(i+1+offset, j+1+offset, k);
-                }
-                else if (bflo[0])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k);
-                }
-                else if (bflo[1])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k);
-                }
-            }
-            // xhi & ylo
-            else if (i == dhi.x+1 && j == dlo.y-1 && (bfhi[0] || bflo[1]))
-            {
-                if (bfhi[0] && bflo[1])
-                {
-                    a(i,j,k) = a(i-1-offset, j+1+offset, k);
-                }
-                else if (bfhi[0])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k);
-                }
-                else if (bflo[1])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k);
-                }
-            }
-            // xlo & yhi
-            else if (i == dlo.x-1 && j == dhi.y+1 && (bflo[0] || bfhi[1]))
-            {
-                if (bflo[0] && bfhi[1])
-                {
-                    a(i,j,k) = a(i+1+offset, j-1-offset, k);
-                }
-                else if (bflo[0])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k);
-                }
-                else if (bfhi[1])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k);
-                }
-            }
-            // xhi & yhi
-            else if (i == dhi.x+1 && j == dhi.y+1 && (bfhi[0] || bfhi[1]))
-            {
-                if (bfhi[0] && bfhi[1])
-                {
-                    a(i,j,k) = a(i-1-offset, j-1-offset, k);
-                }
-                else if (bfhi[0])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k);
-                }
-                else if (bfhi[1])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k);
-                }
-            }
-            // xlo & zlo
-            else if (i == dlo.x-1 && k == dlo.z-1 && (bflo[0] || bflo[2]))
-            {
-                if (bflo[0] && bflo[2])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k+1+offset);
-                }
-                else if (bflo[0])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k);
-                }
-                else if (bflo[2])
-                {
-                    a(i,j,k) = a(i, j, k+1+offset);
-                }
-            }
-            // xhi & zlo
-            else if (i == dhi.x+1 && k == dlo.z-1 && (bfhi[0] || bflo[2]))
-            {
-                if (bfhi[0] && bflo[2])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k+1+offset);
-                }
-                else if (bfhi[0])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k);
-                }
-                else if (bflo[2])
-                {
-                    a(i,j,k) = a(i, j, k+1+offset);
-                }
-            }
-            // xlo & zhi
-            else if (i == dlo.x-1 && k == dhi.z+1 && (bflo[0] || bfhi[2]))
-            {
-                if (bflo[0] && bfhi[2])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k-1-offset);
-                }
-                else if (bflo[0])
-                {
-                    a(i,j,k) = a(i+1+offset, j, k);
-                }
-                else if (bfhi[2])
-                {
-                    a(i,j,k) = a(i, j, k-1-offset);
-                }
-            }
-            // xhi & zhi
-            else if (i == dhi.x+1 && k == dhi.z+1 && (bfhi[0] || bfhi[2]))
-            {
-                if (bfhi[0] && bfhi[2])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k-1-offset);
-                }
-                else if (bfhi[0])
-                {
-                    a(i,j,k) = a(i-1-offset, j, k);
-                }
-                else if (bfhi[2])
-                {
-                    a(i,j,k) = a(i, j, k-1-offset);
-                }
-            }
-            // ylo & zlo
-            else if (j == dlo.y-1 && k == dlo.z-1 && (bflo[1] || bflo[2]))
-            {
-                if (bflo[1] && bflo[2])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k+1+offset);
-                }
-                else if (bflo[1])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k);
-                }
-                else if (bflo[2])
-                {
-                    a(i,j,k) = a(i, j, k+1+offset);
-                }
-            }
-            // yhi & zlo
-            else if (j == dhi.y+1 && k == dlo.z-1 && (bfhi[1] || bflo[2]))
-            {
-                if (bfhi[1] && bflo[2])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k+1+offset);
-                }
-                else if (bfhi[1])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k);
-                }
-                else if (bflo[2])
-                {
-                    a(i,j,k) = a(i, j, k+1+offset);
-                }
-            }
-            // ylo & zhi
-            else if (j == dlo.y-1 && k == dhi.z+1 && (bflo[1] || bfhi[2]))
-            {
-                if (bflo[1] && bfhi[2])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k-1-offset);
-                }
-                else if (bflo[1])
-                {
-                    a(i,j,k) = a(i, j+1+offset, k);
-                }
-                else if (bfhi[2])
-                {
-                    a(i,j,k) = a(i, j, k-1-offset);
-                }
-            }
-            // yhi & zhi
-            else if (j == dhi.y+1 && k == dhi.z+1 && (bfhi[1] || bfhi[2]))
-            {
-                if (bfhi[1] && bfhi[2])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k-1-offset);
-                }
-                else if (bfhi[1])
-                {
-                    a(i,j,k) = a(i, j-1-offset, k);
-                }
-                else if (bfhi[2])
-                {
-                    a(i,j,k) = a(i, j, k-1-offset);
-                }
-            }
-            else if (i == dlo.x-1 && bflo[0])
-            {
-                a(i,j,k) = a(i+1+offset, j, k);
-            }
-            else if (i == dhi.x+1 && bfhi[0])
-            {
-                a(i,j,k) = a(i-1-offset, j, k);
-            }
-            else if (j == dlo.y-1 && bflo[1])
-            {
-                a(i,j,k) = a(i, j+1+offset, k);
-            }
-            else if (j == dhi.y+1 && bfhi[1])
-            {
-                a(i,j,k) = a(i, j-1-offset, k);
-            }
-            else if (k == dlo.z-1 && bflo[2])
-            {
-                a(i,j,k) = a(i, j, k+1+offset);
-            }
-            else if (k == dhi.z+1 && bfhi[2])
-            {
-                a(i,j,k) = a(i, j, k-1-offset);
-            }
-        }
-    });
-}
 
 //
 // operator
@@ -1587,138 +915,6 @@ void mlndlap_gauss_seidel_with_line_solve_aa (Box const& bx, Array4<Real> const&
     }
 }
 
-//
-// restriction
-//
-
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_restriction (int i, int j, int k, Array4<Real> const& crse,
-                          Array4<Real const> const& fine, Array4<int const> const& msk) noexcept
-{
-    int ii = i*2;
-    int jj = j*2;
-    int kk = k*2;
-    if (msk(ii,jj,kk)) {
-        crse(i,j,k) = Real(0.0);
-    } else {
-        crse(i,j,k) = Real(1./64.)*(fine(ii-1,jj-1,kk-1)+fine(ii+1,jj-1,kk-1)
-                                   +fine(ii-1,jj+1,kk-1)+fine(ii+1,jj+1,kk-1)
-                                   +fine(ii-1,jj-1,kk+1)+fine(ii+1,jj-1,kk+1)
-                                   +fine(ii-1,jj+1,kk+1)+fine(ii+1,jj+1,kk+1))
-                    + Real(1./32.)*(fine(ii  ,jj-1,kk-1)+fine(ii  ,jj+1,kk-1)
-                                   +fine(ii  ,jj-1,kk+1)+fine(ii  ,jj+1,kk+1)
-                                   +fine(ii-1,jj  ,kk-1)+fine(ii+1,jj  ,kk-1)
-                                   +fine(ii-1,jj  ,kk+1)+fine(ii+1,jj  ,kk+1)
-                                   +fine(ii-1,jj-1,kk  )+fine(ii+1,jj-1,kk  )
-                                   +fine(ii-1,jj+1,kk  )+fine(ii+1,jj+1,kk  ))
-                    + Real(1./16.)*(fine(ii-1,jj,kk)+fine(ii+1,jj,kk)
-                                   +fine(ii,jj-1,kk)+fine(ii,jj+1,kk)
-                                   +fine(ii,jj,kk-1)+fine(ii,jj,kk+1))
-                      + Real(1./8.)*fine(ii,jj,kk);
-    }
-}
-
-template <int rr>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_restriction (int i, int j, int k, Array4<Real> const& crse,
-                          Array4<Real const> const& fine, Array4<int const> const& msk,
-                          Box const& fdom,
-                          GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
-                          GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
-{
-    const int ii = i*rr;
-    const int jj = j*rr;
-    const int kk = k*rr;
-    if (msk(ii,jj,kk)) {
-        crse(i,j,k) = Real(0.0);
-    } else {
-        const auto ndlo = amrex::lbound(fdom);
-        const auto ndhi = amrex::ubound(fdom);
-        Real tmp = Real(0.0);
-        for (int koff = -rr+1; koff <= rr-1; ++koff) {
-            Real wz = rr - std::abs(koff);
-            for (int joff = -rr+1; joff <= rr-1; ++joff) {
-                Real wy = rr - std::abs(joff);
-                for (int ioff = -rr+1; ioff <= rr-1; ++ioff) {
-                    Real wx = rr - std::abs(ioff);
-                    int itmp = ii + ioff;
-                    int jtmp = jj + joff;
-                    int ktmp = kk + koff;
-                    if ((itmp < ndlo.x && (bclo[0] == LinOpBCType::Neumann ||
-                                           bclo[0] == LinOpBCType::inflow)) ||
-                        (itmp > ndhi.x && (bchi[0] == LinOpBCType::Neumann ||
-                                           bchi[0] == LinOpBCType::inflow))) {
-                        itmp = ii - ioff;
-                    }
-                    if ((jtmp < ndlo.y && (bclo[1] == LinOpBCType::Neumann ||
-                                           bclo[1] == LinOpBCType::inflow)) ||
-                        (jtmp > ndhi.y && (bchi[1] == LinOpBCType::Neumann ||
-                                           bchi[1] == LinOpBCType::inflow))) {
-                        jtmp = jj - joff;
-                    }
-                    if ((ktmp < ndlo.z && (bclo[2] == LinOpBCType::Neumann ||
-                                           bclo[2] == LinOpBCType::inflow)) ||
-                        (ktmp > ndhi.z && (bchi[2] == LinOpBCType::Neumann ||
-                                           bchi[2] == LinOpBCType::inflow))) {
-                        ktmp = kk - koff;
-                    }
-                    tmp += wx*wy*wz*fine(itmp,jtmp,ktmp);
-                }
-            }
-        }
-        crse(i,j,k) = tmp/Real(rr*rr*rr*rr*rr*rr);
-    }
-}
-
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void mlndlap_semi_restriction (int i, int j, int k, Array4<Real> const& crse,
-                               Array4<Real const> const& fine, Array4<int const> const& msk, int idir) noexcept
-{
-    if (idir == 2)
-    {
-        int ii = i*2;
-        int jj = j*2;
-        int kk = k;
-        if (msk(ii,jj,kk)) {
-            crse(i,j,k) = Real(0.0);
-        } else { // use 2-D formula
-            crse(i,j,k) = Real(1./16.)*(fine(ii-1,jj-1,kk) + Real(2.)*fine(ii  ,jj-1,kk) +          fine(ii+1,jj-1,kk)
-                             + Real(2.)*fine(ii-1,jj  ,kk) + Real(4.)*fine(ii  ,jj  ,kk) + Real(2.)*fine(ii+1,jj  ,kk)
-                                      + fine(ii-1,jj+1,kk) + Real(2.)*fine(ii  ,jj+1,kk) +          fine(ii+1,jj+1,kk));
-        }
-    }
-    else if (idir == 1)
-    {
-        int ii = i*2;
-        int jj = j;
-        int kk = k*2;
-        if (msk(ii,jj,kk)) {
-            crse(i,j,k) = Real(0.0);
-        } else { // use 2-D formula
-            crse(i,j,k) = Real(1./16.)*(fine(ii-1,jj,kk-1) + Real(2.)*fine(ii  ,jj,kk-1) +          fine(ii+1,jj,kk-1)
-                             + Real(2.)*fine(ii-1,jj  ,kk) + Real(4.)*fine(ii  ,jj,kk  ) + Real(2.)*fine(ii+1,jj,kk  )
-                                      + fine(ii-1,jj,kk+1) + Real(2.)*fine(ii  ,jj,kk+1) +          fine(ii+1,jj,kk+1));
-        }
-    }
-    else if (idir == 0)
-    {
-        int ii = i;
-        int jj = j*2;
-        int kk = k*2;
-        if (msk(ii,jj,kk)) {
-            crse(i,j,k) = Real(0.0);
-        } else { // use 2-D formula
-            crse(i,j,k) = Real(1./16.)*(fine(ii,jj-1,kk-1) + Real(2.)*fine(ii  ,jj,kk-1) +          fine(ii,jj+1,kk-1)
-                             + Real(2.)*fine(ii,jj-1  ,kk) + Real(4.)*fine(ii  ,jj,kk  ) + Real(2.)*fine(ii,jj+1,kk  )
-                                      + fine(ii,jj-1,kk+1) + Real(2.)*fine(ii  ,jj,kk+1) +          fine(ii,jj+1,kk+1));
-        }
-    }
-    else
-    {
-        amrex::Abort("mlndlap_semi_restriction semi direction wrong semi-direction. ");
-    }
-}
-
 //
 // interpolation
 //
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_K.H
index 4e76a486890..f82c94d2743 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_K.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_K.H
@@ -7,6 +7,7 @@
 #ifdef AMREX_USE_EB
 #include <AMReX_EBCellFlag.H>
 #endif
+#include <AMReX_MLNodeLinOp_K.H>
 
 namespace amrex {
 
@@ -53,11 +54,6 @@ namespace nodelap_detail {
         }
     };
 
-    constexpr int crse_cell = 0; // Do NOT change the values
-    constexpr int fine_cell = 1;
-    constexpr int crse_node = 0;
-    constexpr int crse_fine_node = 1;
-    constexpr int fine_node = 2;
 #if (BL_USE_FLOAT)
     constexpr float eps = 1.e-30_rt;
 #else
@@ -123,40 +119,6 @@ mlndlap_unimpose_neumann_bc (Box const& bx, Array4<Real> const& rhs, Box const&
 
 namespace amrex {
 
-template <typename T>
-void mlndlap_fillbc_cc (Box const& vbx, Array4<T> const& sigma, Box const& domain,
-                        GpuArray<LinOpBCType, AMREX_SPACEDIM> bclo,
-                        GpuArray<LinOpBCType, AMREX_SPACEDIM> bchi) noexcept
-{
-    GpuArray<bool,AMREX_SPACEDIM> bflo{{AMREX_D_DECL(bclo[0] != LinOpBCType::Periodic,
-                                                     bclo[1] != LinOpBCType::Periodic,
-                                                     bclo[2] != LinOpBCType::Periodic)}};
-    GpuArray<bool,AMREX_SPACEDIM> bfhi{{AMREX_D_DECL(bchi[0] != LinOpBCType::Periodic,
-                                                     bchi[1] != LinOpBCType::Periodic,
-                                                     bchi[2] != LinOpBCType::Periodic)}};
-    mlndlap_bc_doit(vbx, sigma, domain, bflo, bfhi);
-}
-
-template <typename T>
-void mlndlap_applybc (Box const& vbx, Array4<T> const& phi, Box const& domain,
-                      GpuArray<LinOpBCType, AMREX_SPACEDIM> bclo,
-                      GpuArray<LinOpBCType, AMREX_SPACEDIM> bchi) noexcept
-{
-    GpuArray<bool,AMREX_SPACEDIM> bflo{{AMREX_D_DECL(bclo[0] == LinOpBCType::Neumann ||
-                                                     bclo[0] == LinOpBCType::inflow,
-                                                     bclo[1] == LinOpBCType::Neumann ||
-                                                     bclo[1] == LinOpBCType::inflow,
-                                                     bclo[2] == LinOpBCType::Neumann ||
-                                                     bclo[2] == LinOpBCType::inflow)}};
-    GpuArray<bool,AMREX_SPACEDIM> bfhi{{AMREX_D_DECL(bchi[0] == LinOpBCType::Neumann ||
-                                                     bchi[0] == LinOpBCType::inflow,
-                                                     bchi[1] == LinOpBCType::Neumann ||
-                                                     bchi[1] == LinOpBCType::inflow,
-                                                     bchi[2] == LinOpBCType::Neumann ||
-                                                     bchi[2] == LinOpBCType::inflow)}};
-    mlndlap_bc_doit(vbx, phi, domain, bflo, bfhi);
-}
-
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 void mlndlap_normalize_sten (int i, int j, int k, Array4<Real> const& x,
                              Array4<Real const> const& sten,
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp
index 38f58b70bbb..929d05dc5a1 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp
+++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp
@@ -1,6 +1,6 @@
 
 #include <AMReX_MLNodeLinOp.H>
-#include <AMReX_MLNodeLap_K.H>
+#include <AMReX_MLNodeLinOp_K.H>
 #include <AMReX_MLMG_K.H>
 #include <AMReX_MultiFabUtil.H>
 
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_1D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_1D_K.H
new file mode 100644
index 00000000000..b842dd81b88
--- /dev/null
+++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_1D_K.H
@@ -0,0 +1,167 @@
+#ifndef AMREX_ML_NODE_LINOP_1D_K_H_
+#define AMREX_ML_NODE_LINOP_1D_K_H_
+#include <AMReX_Config.H>
+
+namespace amrex {
+
+template <typename T>
+void mlndlap_bc_doit (Box const& vbx, Array4<T> const& a, Box const& domain,
+                      GpuArray<bool,AMREX_SPACEDIM> const& bflo,
+                      GpuArray<bool,AMREX_SPACEDIM> const& bfhi) noexcept
+{
+    Box gdomain = domain;
+    int const idim = 0;
+    if (! bflo[idim]) { gdomain.growLo(idim,1); }
+    if (! bfhi[idim]) { gdomain.growHi(idim,1); }
+
+    if (gdomain.strictly_contains(vbx)) { return; }
+
+    const int offset = domain.cellCentered() ? 0 : 1;
+
+    const auto dlo = domain.smallEnd(0);
+    const auto dhi = domain.bigEnd(0);
+
+    Box const& sbox = amrex::grow(vbx,1);
+    AMREX_HOST_DEVICE_FOR_3D(sbox, i, j, k,
+    {
+        if (! gdomain.contains(IntVect(i))) {
+            if (i == dlo-1 && bflo[0])
+            {
+                a(i,0,0) = a(i+1+offset, j, k);
+            }
+            else if (i == dhi+1 && bfhi[0])
+            {
+                a(i,0,0) = a(i-1-offset, j, k);
+            }
+        }
+    });
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_restriction (int i, int, int, Array4<Real> const& crse,
+                          Array4<Real const> const& fine, Array4<int const> const& msk) noexcept
+{
+    int ii = i*2;
+    if (msk(ii,0,0)) {
+        crse(i,0,0) = Real(0.0);
+    } else {
+        crse(i,0,0) = Real(1./4.)  *(fine(ii-1,0,0)
+                         + Real(2.)* fine(ii  ,0,0)
+                         +           fine(ii+1,0,0));
+    }
+}
+
+template <int rr>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_restriction (int i, int, int, Array4<Real> const& crse,
+                          Array4<Real const> const& fine, Array4<int const> const& msk,
+                          Box const& fdom,
+                          GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
+                          GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
+
+{
+    const int ii = i*rr;
+    if (msk(ii,0,0)) {
+        crse(i,0,0) = Real(0.0);
+    } else {
+        const auto ndlo = fdom.smallEnd(0);
+        const auto ndhi = fdom.bigEnd(0);
+        Real tmp = Real(0.0);
+        for (int ioff = -rr+1; ioff <= rr-1; ++ioff) {
+            Real wx = rr - std::abs(ioff);
+            int itmp = ii + ioff;
+            if ((itmp < ndlo && (bclo[0] == LinOpBCType::Neumann ||
+                                 bclo[0] == LinOpBCType::inflow)) ||
+                (itmp > ndhi && (bchi[0] == LinOpBCType::Neumann ||
+                                 bchi[0] == LinOpBCType::inflow))) {
+                itmp = ii - ioff;
+            }
+            tmp += wx*fine(itmp,0,0);
+        }
+        crse(i,0,0) = tmp*(Real(1.0)/Real(rr*rr));
+    }
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_semi_restriction (int /*i*/, int /*j*/, int /*k*/, Array4<Real> const&,
+                          Array4<Real const> const&, Array4<int const> const&, int) noexcept
+{
+    amrex::Abort("mlndlap_semi_restriction: not implemented in 1D");
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_set_nodal_mask (int i, int, int, Array4<int> const& nmsk,
+                             Array4<int const> const& cmsk) noexcept
+{
+    using namespace nodelap_detail;
+
+    int s = cmsk(i-1,0,0) + cmsk(i,0,0);
+    if (s == 2*crse_cell) {
+        nmsk(i,0,0) = crse_node;
+    } else if (s == 2*fine_cell) {
+        nmsk(i,0,0) = fine_node;
+    } else {
+        nmsk(i,0,0) = crse_fine_node;
+    }
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_set_dirichlet_mask (Box const& bx, Array4<int> const& dmsk,
+                                 Array4<int const> const& omsk, Box const& dom,
+                                 GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
+                                 GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
+{
+    const auto lo = bx.smallEnd(0);
+    const auto hi = bx.bigEnd(0);
+    AMREX_PRAGMA_SIMD
+    for (int i = lo; i <= hi; ++i) {
+        if (!dmsk(i,0,0)) {
+            dmsk(i,0,0) = (omsk(i-1,0,0) == 1 || omsk(i,0,0) == 1);
+        }
+    }
+
+    const auto domlo = dom.smallEnd(0);
+    const auto domhi = dom.bigEnd(0);
+
+    if (bclo[0] == LinOpBCType::Dirichlet && lo == domlo) {
+        dmsk(lo,0,0) = 1;
+    }
+
+    if (bchi[0] == LinOpBCType::Dirichlet && hi == domhi) {
+        dmsk(hi,0,0) = 1;
+    }
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_set_dot_mask (Box const& bx, Array4<Real> const& dmsk,
+                           Array4<int const> const& omsk, Box const& dom,
+                           GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
+                           GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
+{
+    const auto lo = bx.smallEnd(0);
+    const auto hi = bx.bigEnd(0);
+
+    AMREX_PRAGMA_SIMD
+    for (int i = lo; i <= hi; ++i) {
+        dmsk(i,0,0) = static_cast<Real>(omsk(i,0,0));
+    }
+
+    const auto domlo = dom.smallEnd(0);
+    const auto domhi = dom.bigEnd(0);
+
+    if ((bclo[0] == LinOpBCType::Neumann || bclo[0] == LinOpBCType::inflow)
+        && lo == domlo)
+    {
+        dmsk(lo,0,0) *= Real(0.5);
+    }
+
+    if ((bchi[0] == LinOpBCType::Neumann || bchi[0] == LinOpBCType::inflow)
+        && hi == domhi)
+    {
+        dmsk(hi,0,0) *= Real(0.5);
+    }
+}
+
+}
+
+#endif
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_2D_K.H
new file mode 100644
index 00000000000..3d8746cf058
--- /dev/null
+++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_2D_K.H
@@ -0,0 +1,317 @@
+#ifndef AMREX_ML_NODE_LINOP_2D_K_H_
+#define AMREX_ML_NODE_LINOP_2D_K_H_
+#include <AMReX_Config.H>
+
+namespace amrex {
+
+template <typename T>
+void mlndlap_bc_doit (Box const& vbx, Array4<T> const& a, Box const& domain,
+                      GpuArray<bool,AMREX_SPACEDIM> const& bflo,
+                      GpuArray<bool,AMREX_SPACEDIM> const& bfhi) noexcept
+{
+    Box gdomain = domain;
+    for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
+        if (! bflo[idim]) { gdomain.growLo(idim,1); }
+        if (! bfhi[idim]) { gdomain.growHi(idim,1); }
+    }
+
+    if (gdomain.strictly_contains(vbx)) { return; }
+
+    const int offset = domain.cellCentered() ? 0 : 1;
+
+    const auto dlo = amrex::lbound(domain);
+    const auto dhi = amrex::ubound(domain);
+
+    Box const& sbox = amrex::grow(vbx,1);
+    AMREX_HOST_DEVICE_FOR_3D(sbox, i, j, k,
+    {
+        if (! gdomain.contains(IntVect(i,j))) {
+            // xlo & ylo
+            if (i == dlo.x-1 && j == dlo.y-1 && (bflo[0] || bflo[1]))
+            {
+                if (bflo[0] && bflo[1])
+                {
+                    a(i,j,k) = a(i+1+offset, j+1+offset, k);
+                }
+                else if (bflo[0])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k);
+                }
+                else if (bflo[1])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k);
+                }
+            }
+            // xhi & ylo
+            else if (i == dhi.x+1 && j == dlo.y-1 && (bfhi[0] || bflo[1]))
+            {
+                if (bfhi[0] && bflo[1])
+                {
+                    a(i,j,k) = a(i-1-offset, j+1+offset, k);
+                }
+                else if (bfhi[0])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k);
+                }
+                else if (bflo[1])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k);
+                }
+            }
+            // xlo & yhi
+            else if (i == dlo.x-1 && j == dhi.y+1 && (bflo[0] || bfhi[1]))
+            {
+                if (bflo[0] && bfhi[1])
+                {
+                    a(i,j,k) = a(i+1+offset, j-1-offset, k);
+                }
+                else if (bflo[0])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k);
+                }
+                else if (bfhi[1])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k);
+                }
+            }
+            // xhi & yhi
+            else if (i == dhi.x+1 && j == dhi.y+1 && (bfhi[0] || bfhi[1]))
+            {
+                if (bfhi[0] && bfhi[1])
+                {
+                    a(i,j,k) = a(i-1-offset, j-1-offset, k);
+                }
+                else if (bfhi[0])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k);
+                }
+                else if (bfhi[1])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k);
+                }
+            }
+            else if (i == dlo.x-1 && bflo[0])
+            {
+                a(i,j,k) = a(i+1+offset, j, k);
+            }
+            else if (i == dhi.x+1 && bfhi[0])
+            {
+                a(i,j,k) = a(i-1-offset, j, k);
+            }
+            else if (j == dlo.y-1 && bflo[1])
+            {
+                a(i,j,k) = a(i, j+1+offset, k);
+            }
+            else if (j == dhi.y+1 && bfhi[1])
+            {
+                a(i,j,k) = a(i, j-1-offset, k);
+            }
+        }
+    });
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_restriction (int i, int j, int k, Array4<Real> const& crse,
+                          Array4<Real const> const& fine, Array4<int const> const& msk) noexcept
+{
+    int ii = i*2;
+    int jj = j*2;
+    int kk = 0;
+    if (msk(ii,jj,kk)) {
+        crse(i,j,k) = Real(0.0);
+    } else {
+        crse(i,j,k) = Real(1./16.)*(fine(ii-1,jj-1,kk) + Real(2.)*fine(ii  ,jj-1,kk) +          fine(ii+1,jj-1,kk)
+                         + Real(2.)*fine(ii-1,jj  ,kk) + Real(4.)*fine(ii  ,jj  ,kk) + Real(2.)*fine(ii+1,jj  ,kk)
+                                  + fine(ii-1,jj+1,kk) + Real(2.)*fine(ii  ,jj+1,kk) +          fine(ii+1,jj+1,kk));
+    }
+}
+
+template <int rr>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_restriction (int i, int j, int k, Array4<Real> const& crse,
+                          Array4<Real const> const& fine, Array4<int const> const& msk,
+                          Box const& fdom,
+                          GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
+                          GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
+{
+    const int ii = i*rr;
+    const int jj = j*rr;
+    if (msk(ii,jj,0)) {
+        crse(i,j,k) = Real(0.0);
+    } else {
+        const auto ndlo = amrex::lbound(fdom);
+        const auto ndhi = amrex::ubound(fdom);
+        Real tmp = Real(0.0);
+        for (int joff = -rr+1; joff <= rr-1; ++joff) {
+            Real wy = rr - std::abs(joff);
+            for (int ioff = -rr+1; ioff <= rr-1; ++ioff) {
+                Real wx = rr - std::abs(ioff);
+                int itmp = ii + ioff;
+                int jtmp = jj + joff;
+                if ((itmp < ndlo.x && (bclo[0] == LinOpBCType::Neumann ||
+                                       bclo[0] == LinOpBCType::inflow)) ||
+                    (itmp > ndhi.x && (bchi[0] == LinOpBCType::Neumann ||
+                                       bchi[0] == LinOpBCType::inflow))) {
+                    itmp = ii - ioff;
+                }
+                if ((jtmp < ndlo.y && (bclo[1] == LinOpBCType::Neumann ||
+                                       bclo[1] == LinOpBCType::inflow)) ||
+                    (jtmp > ndhi.y && (bchi[1] == LinOpBCType::Neumann ||
+                                       bchi[1] == LinOpBCType::inflow))) {
+                    jtmp = jj - joff;
+                }
+                tmp += wx*wy*fine(itmp,jtmp,0);
+            }
+        }
+        crse(i,j,k) = tmp*(Real(1.0)/Real(rr*rr*rr*rr));
+    }
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_semi_restriction (int i, int j, int k, Array4<Real> const& crse,
+                          Array4<Real const> const& fine, Array4<int const> const& msk, int idir) noexcept
+{
+    int kk = 0;
+    if (idir == 1) {
+        int ii = i*2;
+        int jj = j;
+        if (msk(ii,jj,kk)) {
+            crse(i,j,k) = Real(0.0);
+        } else {
+            crse(i,j,k) = Real(1./4.)*(fine(ii-1,jj,kk) + Real(2.)*fine(ii,jj,kk) + fine(ii+1,jj,kk));
+        }
+    } else if (idir == 0) {
+        int ii = i;
+        int jj = j*2;
+        if (msk(ii,jj,kk)) {
+            crse(i,j,k) = Real(0.0);
+        } else {
+            crse(i,j,k) = Real(1./4.)*(fine(ii,jj-1,kk) + Real(2.)*fine(ii,jj,kk) + fine(ii,jj+1,kk));
+        }
+    }
+}
+
+//
+// masks
+//
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_set_nodal_mask (int i, int j, int k, Array4<int> const& nmsk,
+                             Array4<int const> const& cmsk) noexcept
+{
+    using namespace nodelap_detail;
+
+    int s = cmsk(i-1,j-1,k) + cmsk(i  ,j-1,k)
+        +   cmsk(i-1,j  ,k) + cmsk(i  ,j  ,k);
+    if (s == 4*crse_cell) {
+        nmsk(i,j,k) = crse_node;
+    }
+    else if (s == 4*fine_cell) {
+        nmsk(i,j,k) = fine_node;
+    } else {
+        nmsk(i,j,k) = crse_fine_node;
+    }
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_set_dirichlet_mask (Box const& bx, Array4<int> const& dmsk,
+                                 Array4<int const> const& omsk, Box const& dom,
+                                 GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
+                                 GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
+{
+    const auto lo = amrex::lbound(bx);
+    const auto hi = amrex::ubound(bx);
+    for (int j = lo.y; j <= hi.y; ++j) {
+    AMREX_PRAGMA_SIMD
+    for (int i = lo.x; i <= hi.x; ++i) {
+        if (!dmsk(i,j,0)) {
+            dmsk(i,j,0) = (omsk(i-1,j-1,0) == 1 || omsk(i,j-1,0) == 1 ||
+                           omsk(i-1,j  ,0) == 1 || omsk(i,j  ,0) == 1);
+        }
+    }}
+
+    const auto domlo = amrex::lbound(dom);
+    const auto domhi = amrex::ubound(dom);
+
+    if (bclo[0] == LinOpBCType::Dirichlet && lo.x == domlo.x) {
+        for (int j = lo.y; j <= hi.y; ++j) {
+            dmsk(lo.x,j,0) = 1;
+        }
+    }
+
+    if (bchi[0] == LinOpBCType::Dirichlet && hi.x == domhi.x) {
+        for (int j = lo.y; j <= hi.y; ++j) {
+            dmsk(hi.x,j,0) = 1;
+        }
+    }
+
+    if (bclo[1] == LinOpBCType::Dirichlet && lo.y == domlo.y) {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            dmsk(i,lo.y,0) = 1;
+        }
+    }
+
+    if (bchi[1] == LinOpBCType::Dirichlet && hi.y == domhi.y) {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            dmsk(i,hi.y,0) = 1;
+        }
+    }
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_set_dot_mask (Box const& bx, Array4<Real> const& dmsk,
+                           Array4<int const> const& omsk, Box const& dom,
+                           GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
+                           GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
+{
+    const auto lo = amrex::lbound(bx);
+    const auto hi = amrex::ubound(bx);
+    for (int j = lo.y; j <= hi.y; ++j) {
+    AMREX_PRAGMA_SIMD
+    for (int i = lo.x; i <= hi.x; ++i) {
+        dmsk(i,j,0) = static_cast<Real>(omsk(i,j,0));
+    }}
+
+    const auto domlo = amrex::lbound(dom);
+    const auto domhi = amrex::ubound(dom);
+
+    if ((bclo[0] == LinOpBCType::Neumann || bclo[0] == LinOpBCType::inflow)
+        && lo.x == domlo.x)
+    {
+        for (int j = lo.y; j <= hi.y; ++j) {
+            dmsk(lo.x,j,0) *= Real(0.5);
+        }
+    }
+
+    if ((bchi[0] == LinOpBCType::Neumann || bchi[0] == LinOpBCType::inflow)
+        && hi.x == domhi.x)
+    {
+        for (int j = lo.y; j <= hi.y; ++j) {
+            dmsk(hi.x,j,0) *= Real(0.5);
+        }
+    }
+
+    if ((bclo[1] == LinOpBCType::Neumann || bclo[1] == LinOpBCType::inflow)
+        && lo.y == domlo.y)
+    {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            dmsk(i,lo.y,0) *= Real(0.5);
+        }
+    }
+
+    if ((bchi[1] == LinOpBCType::Neumann || bchi[1] == LinOpBCType::inflow)
+        && hi.y == domhi.y)
+    {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            dmsk(i,hi.y,0) *= Real(0.5);
+        }
+    }
+}
+
+}
+
+#endif
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_3D_K.H
new file mode 100644
index 00000000000..976a16c7aa5
--- /dev/null
+++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_3D_K.H
@@ -0,0 +1,810 @@
+#ifndef AMREX_ML_NODE_LINOP_3D_K_H_
+#define AMREX_ML_NODE_LINOP_3D_K_H_
+#include <AMReX_Config.H>
+
+namespace amrex {
+
+template <typename T>
+inline void mlndlap_bc_doit (Box const& vbx, Array4<T> const& a, Box const& domain,
+                             GpuArray<bool,AMREX_SPACEDIM> const& bflo,
+                             GpuArray<bool,AMREX_SPACEDIM> const& bfhi) noexcept
+{
+    Box gdomain = domain;
+    for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
+        if (! bflo[idim]) { gdomain.growLo(idim,1); }
+        if (! bfhi[idim]) { gdomain.growHi(idim,1); }
+    }
+
+    if (gdomain.strictly_contains(vbx)) { return; }
+
+    const int offset = domain.cellCentered() ? 0 : 1;
+
+    const auto dlo = amrex::lbound(domain);
+    const auto dhi = amrex::ubound(domain);
+
+    Box const& sbox = amrex::grow(vbx,1);
+    AMREX_HOST_DEVICE_FOR_3D(sbox, i, j, k,
+    {
+        if (! gdomain.contains(IntVect(i,j,k))) {
+            // xlo & ylo & zlo
+            if (i == dlo.x-1 && j == dlo.y-1 && k == dlo.z-1 && (bflo[0] || bflo[1] || bflo[2]))
+            {
+                if (bflo[0] && bflo[1] && bflo[2])
+                {
+                    a(i,j,k) = a(i+1+offset, j+1+offset, k+1+offset);
+                }
+                else if (bflo[0] && bflo[1])
+                {
+                    a(i,j,k) = a(i+1+offset, j+1+offset, k);
+                }
+                else if (bflo[0] && bflo[2])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k+1+offset);
+                }
+                else if (bflo[1] && bflo[2])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k+1+offset);
+                }
+                else if (bflo[0])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k);
+                }
+                else if (bflo[1])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k);
+                }
+                else if (bflo[2])
+                {
+                    a(i,j,k) = a(i, j, k+1+offset);
+                }
+            }
+            // xhi & ylo & zlo
+            else if (i == dhi.x+1 && j == dlo.y-1 && k == dlo.z-1 && (bfhi[0] || bflo[1] || bflo[2]))
+            {
+                if (bfhi[0] && bflo[1] && bflo[2])
+                {
+                    a(i,j,k) = a(i-1-offset, j+1+offset, k+1+offset);
+                }
+                else if (bfhi[0] && bflo[1])
+                {
+                    a(i,j,k) = a(i-1-offset, j+1+offset, k);
+                }
+                else if (bfhi[0] && bflo[2])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k+1+offset);
+                }
+                else if (bflo[1] && bflo[2])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k+1+offset);
+                }
+                else if (bfhi[0])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k);
+                }
+                else if (bflo[1])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k);
+                }
+                else if (bflo[2])
+                {
+                    a(i,j,k) = a(i, j, k+1+offset);
+                }
+            }
+            // xlo & yhi & zlo
+            else if (i == dlo.x-1 && j == dhi.y+1 && k == dlo.z-1 && (bflo[0] || bfhi[1] || bflo[2]))
+            {
+                if (bflo[0] && bfhi[1] && bflo[2])
+                {
+                    a(i,j,k) = a(i+1+offset, j-1-offset, k+1+offset);
+                }
+                else if (bflo[0] && bfhi[1])
+                {
+                    a(i,j,k) = a(i+1+offset, j-1-offset, k);
+                }
+                else if (bflo[0] && bflo[2])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k+1+offset);
+                }
+                else if (bfhi[1] && bflo[2])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k+1+offset);
+                }
+                else if (bflo[0])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k);
+                }
+                else if (bfhi[1])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k);
+                }
+                else if (bflo[2])
+                {
+                    a(i,j,k) = a(i, j, k+1+offset);
+                }
+            }
+            // xhi & yhi & zlo
+            else if (i == dhi.x+1 && j == dhi.y+1 && k == dlo.z-1 && (bfhi[0] || bfhi[1] || bflo[2]))
+            {
+                if (bfhi[0] && bfhi[1] && bflo[2])
+                {
+                    a(i,j,k) = a(i-1-offset, j-1-offset, k+1+offset);
+                }
+                else if (bfhi[0] && bfhi[1])
+                {
+                    a(i,j,k) = a(i-1-offset, j-1-offset, k);
+                }
+                else if (bfhi[0] && bflo[2])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k+1+offset);
+                }
+                else if (bfhi[1] && bflo[2])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k+1+offset);
+                }
+                else if (bfhi[0])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k);
+                }
+                else if (bfhi[1])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k);
+                }
+                else if (bflo[2])
+                {
+                    a(i,j,k) = a(i, j, k+1+offset);
+                }
+            }
+            // xlo & ylo & zhi
+            else if (i == dlo.x-1 && j == dlo.y-1 && k == dhi.z+1 && (bflo[0] || bflo[1] || bfhi[2]))
+            {
+                if (bflo[0] && bflo[1] && bfhi[2])
+                {
+                    a(i,j,k) = a(i+1+offset, j+1+offset, k-1-offset);
+                }
+                else if (bflo[0] && bflo[1])
+                {
+                    a(i,j,k) = a(i+1+offset, j+1+offset, k);
+                }
+                else if (bflo[0] && bfhi[2])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k-1-offset);
+                }
+                else if (bflo[1] && bfhi[2])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k-1-offset);
+                }
+                else if (bflo[0])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k);
+                }
+                else if (bflo[1])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k);
+                }
+                else if (bfhi[2])
+                {
+                    a(i,j,k) = a(i, j, k-1-offset);
+                }
+            }
+            // xhi & ylo & zhi
+            else if (i == dhi.x+1 && j == dlo.y-1 && k == dhi.z+1 && (bfhi[0] || bflo[1] || bfhi[2]))
+            {
+                if (bfhi[0] && bflo[1] && bfhi[2])
+                {
+                    a(i,j,k) = a(i-1-offset, j+1+offset, k-1-offset);
+                }
+                else if (bfhi[0] && bflo[1])
+                {
+                    a(i,j,k) = a(i-1-offset, j+1+offset, k);
+                }
+                else if (bfhi[0] && bfhi[2])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k-1-offset);
+                }
+                else if (bflo[1] && bfhi[2])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k-1-offset);
+                }
+                else if (bfhi[0])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k);
+                }
+                else if (bflo[1])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k);
+                }
+                else if (bfhi[2])
+                {
+                    a(i,j,k) = a(i, j, k-1-offset);
+                }
+            }
+            // xlo & yhi & zhi
+            else if (i == dlo.x-1 && j == dhi.y+1 && k == dhi.z+1 && (bflo[0] || bfhi[1] || bfhi[2]))
+            {
+                if (bflo[0] && bfhi[1] && bfhi[2])
+                {
+                    a(i,j,k) = a(i+1+offset, j-1-offset, k-1-offset);
+                }
+                else if (bflo[0] && bfhi[1])
+                {
+                    a(i,j,k) = a(i+1+offset, j-1-offset, k);
+                }
+                else if (bflo[0] && bfhi[2])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k-1-offset);
+                }
+                else if (bfhi[1] && bfhi[2])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k-1-offset);
+                }
+                else if (bflo[0])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k);
+                }
+                else if (bfhi[1])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k);
+                }
+                else if (bfhi[2])
+                {
+                    a(i,j,k) = a(i, j, k-1-offset);
+                }
+            }
+            // xhi & yhi & zhi
+            else if (i == dhi.x+1 && j == dhi.y+1 && k == dhi.z+1 && (bfhi[0] || bfhi[1] || bfhi[2]))
+            {
+                if (bfhi[0] && bfhi[1] && bfhi[2])
+                {
+                    a(i,j,k) = a(i-1-offset, j-1-offset, k-1-offset);
+                }
+                else if (bfhi[0] && bfhi[1])
+                {
+                    a(i,j,k) = a(i-1-offset, j-1-offset, k);
+                }
+                else if (bfhi[0] && bfhi[2])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k-1-offset);
+                }
+                else if (bfhi[1] && bfhi[2])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k-1-offset);
+                }
+                else if (bfhi[0])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k);
+                }
+                else if (bfhi[1])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k);
+                }
+                else if (bfhi[2])
+                {
+                    a(i,j,k) = a(i, j, k-1-offset);
+                }
+            }
+            // xlo & ylo
+            else if (i == dlo.x-1 && j == dlo.y-1 && (bflo[0] || bflo[1]))
+            {
+                if (bflo[0] && bflo[1])
+                {
+                    a(i,j,k) = a(i+1+offset, j+1+offset, k);
+                }
+                else if (bflo[0])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k);
+                }
+                else if (bflo[1])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k);
+                }
+            }
+            // xhi & ylo
+            else if (i == dhi.x+1 && j == dlo.y-1 && (bfhi[0] || bflo[1]))
+            {
+                if (bfhi[0] && bflo[1])
+                {
+                    a(i,j,k) = a(i-1-offset, j+1+offset, k);
+                }
+                else if (bfhi[0])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k);
+                }
+                else if (bflo[1])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k);
+                }
+            }
+            // xlo & yhi
+            else if (i == dlo.x-1 && j == dhi.y+1 && (bflo[0] || bfhi[1]))
+            {
+                if (bflo[0] && bfhi[1])
+                {
+                    a(i,j,k) = a(i+1+offset, j-1-offset, k);
+                }
+                else if (bflo[0])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k);
+                }
+                else if (bfhi[1])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k);
+                }
+            }
+            // xhi & yhi
+            else if (i == dhi.x+1 && j == dhi.y+1 && (bfhi[0] || bfhi[1]))
+            {
+                if (bfhi[0] && bfhi[1])
+                {
+                    a(i,j,k) = a(i-1-offset, j-1-offset, k);
+                }
+                else if (bfhi[0])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k);
+                }
+                else if (bfhi[1])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k);
+                }
+            }
+            // xlo & zlo
+            else if (i == dlo.x-1 && k == dlo.z-1 && (bflo[0] || bflo[2]))
+            {
+                if (bflo[0] && bflo[2])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k+1+offset);
+                }
+                else if (bflo[0])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k);
+                }
+                else if (bflo[2])
+                {
+                    a(i,j,k) = a(i, j, k+1+offset);
+                }
+            }
+            // xhi & zlo
+            else if (i == dhi.x+1 && k == dlo.z-1 && (bfhi[0] || bflo[2]))
+            {
+                if (bfhi[0] && bflo[2])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k+1+offset);
+                }
+                else if (bfhi[0])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k);
+                }
+                else if (bflo[2])
+                {
+                    a(i,j,k) = a(i, j, k+1+offset);
+                }
+            }
+            // xlo & zhi
+            else if (i == dlo.x-1 && k == dhi.z+1 && (bflo[0] || bfhi[2]))
+            {
+                if (bflo[0] && bfhi[2])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k-1-offset);
+                }
+                else if (bflo[0])
+                {
+                    a(i,j,k) = a(i+1+offset, j, k);
+                }
+                else if (bfhi[2])
+                {
+                    a(i,j,k) = a(i, j, k-1-offset);
+                }
+            }
+            // xhi & zhi
+            else if (i == dhi.x+1 && k == dhi.z+1 && (bfhi[0] || bfhi[2]))
+            {
+                if (bfhi[0] && bfhi[2])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k-1-offset);
+                }
+                else if (bfhi[0])
+                {
+                    a(i,j,k) = a(i-1-offset, j, k);
+                }
+                else if (bfhi[2])
+                {
+                    a(i,j,k) = a(i, j, k-1-offset);
+                }
+            }
+            // ylo & zlo
+            else if (j == dlo.y-1 && k == dlo.z-1 && (bflo[1] || bflo[2]))
+            {
+                if (bflo[1] && bflo[2])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k+1+offset);
+                }
+                else if (bflo[1])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k);
+                }
+                else if (bflo[2])
+                {
+                    a(i,j,k) = a(i, j, k+1+offset);
+                }
+            }
+            // yhi & zlo
+            else if (j == dhi.y+1 && k == dlo.z-1 && (bfhi[1] || bflo[2]))
+            {
+                if (bfhi[1] && bflo[2])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k+1+offset);
+                }
+                else if (bfhi[1])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k);
+                }
+                else if (bflo[2])
+                {
+                    a(i,j,k) = a(i, j, k+1+offset);
+                }
+            }
+            // ylo & zhi
+            else if (j == dlo.y-1 && k == dhi.z+1 && (bflo[1] || bfhi[2]))
+            {
+                if (bflo[1] && bfhi[2])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k-1-offset);
+                }
+                else if (bflo[1])
+                {
+                    a(i,j,k) = a(i, j+1+offset, k);
+                }
+                else if (bfhi[2])
+                {
+                    a(i,j,k) = a(i, j, k-1-offset);
+                }
+            }
+            // yhi & zhi
+            else if (j == dhi.y+1 && k == dhi.z+1 && (bfhi[1] || bfhi[2]))
+            {
+                if (bfhi[1] && bfhi[2])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k-1-offset);
+                }
+                else if (bfhi[1])
+                {
+                    a(i,j,k) = a(i, j-1-offset, k);
+                }
+                else if (bfhi[2])
+                {
+                    a(i,j,k) = a(i, j, k-1-offset);
+                }
+            }
+            else if (i == dlo.x-1 && bflo[0])
+            {
+                a(i,j,k) = a(i+1+offset, j, k);
+            }
+            else if (i == dhi.x+1 && bfhi[0])
+            {
+                a(i,j,k) = a(i-1-offset, j, k);
+            }
+            else if (j == dlo.y-1 && bflo[1])
+            {
+                a(i,j,k) = a(i, j+1+offset, k);
+            }
+            else if (j == dhi.y+1 && bfhi[1])
+            {
+                a(i,j,k) = a(i, j-1-offset, k);
+            }
+            else if (k == dlo.z-1 && bflo[2])
+            {
+                a(i,j,k) = a(i, j, k+1+offset);
+            }
+            else if (k == dhi.z+1 && bfhi[2])
+            {
+                a(i,j,k) = a(i, j, k-1-offset);
+            }
+        }
+    });
+}
+
+//
+// restriction
+//
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_restriction (int i, int j, int k, Array4<Real> const& crse,
+                          Array4<Real const> const& fine, Array4<int const> const& msk) noexcept
+{
+    int ii = i*2;
+    int jj = j*2;
+    int kk = k*2;
+    if (msk(ii,jj,kk)) {
+        crse(i,j,k) = Real(0.0);
+    } else {
+        crse(i,j,k) = Real(1./64.)*(fine(ii-1,jj-1,kk-1)+fine(ii+1,jj-1,kk-1)
+                                   +fine(ii-1,jj+1,kk-1)+fine(ii+1,jj+1,kk-1)
+                                   +fine(ii-1,jj-1,kk+1)+fine(ii+1,jj-1,kk+1)
+                                   +fine(ii-1,jj+1,kk+1)+fine(ii+1,jj+1,kk+1))
+                    + Real(1./32.)*(fine(ii  ,jj-1,kk-1)+fine(ii  ,jj+1,kk-1)
+                                   +fine(ii  ,jj-1,kk+1)+fine(ii  ,jj+1,kk+1)
+                                   +fine(ii-1,jj  ,kk-1)+fine(ii+1,jj  ,kk-1)
+                                   +fine(ii-1,jj  ,kk+1)+fine(ii+1,jj  ,kk+1)
+                                   +fine(ii-1,jj-1,kk  )+fine(ii+1,jj-1,kk  )
+                                   +fine(ii-1,jj+1,kk  )+fine(ii+1,jj+1,kk  ))
+                    + Real(1./16.)*(fine(ii-1,jj,kk)+fine(ii+1,jj,kk)
+                                   +fine(ii,jj-1,kk)+fine(ii,jj+1,kk)
+                                   +fine(ii,jj,kk-1)+fine(ii,jj,kk+1))
+                      + Real(1./8.)*fine(ii,jj,kk);
+    }
+}
+
+template <int rr>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_restriction (int i, int j, int k, Array4<Real> const& crse,
+                          Array4<Real const> const& fine, Array4<int const> const& msk,
+                          Box const& fdom,
+                          GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
+                          GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
+{
+    const int ii = i*rr;
+    const int jj = j*rr;
+    const int kk = k*rr;
+    if (msk(ii,jj,kk)) {
+        crse(i,j,k) = Real(0.0);
+    } else {
+        const auto ndlo = amrex::lbound(fdom);
+        const auto ndhi = amrex::ubound(fdom);
+        Real tmp = Real(0.0);
+        for (int koff = -rr+1; koff <= rr-1; ++koff) {
+            Real wz = rr - std::abs(koff);
+            for (int joff = -rr+1; joff <= rr-1; ++joff) {
+                Real wy = rr - std::abs(joff);
+                for (int ioff = -rr+1; ioff <= rr-1; ++ioff) {
+                    Real wx = rr - std::abs(ioff);
+                    int itmp = ii + ioff;
+                    int jtmp = jj + joff;
+                    int ktmp = kk + koff;
+                    if ((itmp < ndlo.x && (bclo[0] == LinOpBCType::Neumann ||
+                                           bclo[0] == LinOpBCType::inflow)) ||
+                        (itmp > ndhi.x && (bchi[0] == LinOpBCType::Neumann ||
+                                           bchi[0] == LinOpBCType::inflow))) {
+                        itmp = ii - ioff;
+                    }
+                    if ((jtmp < ndlo.y && (bclo[1] == LinOpBCType::Neumann ||
+                                           bclo[1] == LinOpBCType::inflow)) ||
+                        (jtmp > ndhi.y && (bchi[1] == LinOpBCType::Neumann ||
+                                           bchi[1] == LinOpBCType::inflow))) {
+                        jtmp = jj - joff;
+                    }
+                    if ((ktmp < ndlo.z && (bclo[2] == LinOpBCType::Neumann ||
+                                           bclo[2] == LinOpBCType::inflow)) ||
+                        (ktmp > ndhi.z && (bchi[2] == LinOpBCType::Neumann ||
+                                           bchi[2] == LinOpBCType::inflow))) {
+                        ktmp = kk - koff;
+                    }
+                    tmp += wx*wy*wz*fine(itmp,jtmp,ktmp);
+                }
+            }
+        }
+        crse(i,j,k) = tmp/Real(rr*rr*rr*rr*rr*rr);
+    }
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_semi_restriction (int i, int j, int k, Array4<Real> const& crse,
+                               Array4<Real const> const& fine, Array4<int const> const& msk, int idir) noexcept
+{
+    if (idir == 2)
+    {
+        int ii = i*2;
+        int jj = j*2;
+        int kk = k;
+        if (msk(ii,jj,kk)) {
+            crse(i,j,k) = Real(0.0);
+        } else { // use 2-D formula
+            crse(i,j,k) = Real(1./16.)*(fine(ii-1,jj-1,kk) + Real(2.)*fine(ii  ,jj-1,kk) +          fine(ii+1,jj-1,kk)
+                             + Real(2.)*fine(ii-1,jj  ,kk) + Real(4.)*fine(ii  ,jj  ,kk) + Real(2.)*fine(ii+1,jj  ,kk)
+                                      + fine(ii-1,jj+1,kk) + Real(2.)*fine(ii  ,jj+1,kk) +          fine(ii+1,jj+1,kk));
+        }
+    }
+    else if (idir == 1)
+    {
+        int ii = i*2;
+        int jj = j;
+        int kk = k*2;
+        if (msk(ii,jj,kk)) {
+            crse(i,j,k) = Real(0.0);
+        } else { // use 2-D formula
+            crse(i,j,k) = Real(1./16.)*(fine(ii-1,jj,kk-1) + Real(2.)*fine(ii  ,jj,kk-1) +          fine(ii+1,jj,kk-1)
+                             + Real(2.)*fine(ii-1,jj  ,kk) + Real(4.)*fine(ii  ,jj,kk  ) + Real(2.)*fine(ii+1,jj,kk  )
+                                      + fine(ii-1,jj,kk+1) + Real(2.)*fine(ii  ,jj,kk+1) +          fine(ii+1,jj,kk+1));
+        }
+    }
+    else if (idir == 0)
+    {
+        int ii = i;
+        int jj = j*2;
+        int kk = k*2;
+        if (msk(ii,jj,kk)) {
+            crse(i,j,k) = Real(0.0);
+        } else { // use 2-D formula
+            crse(i,j,k) = Real(1./16.)*(fine(ii,jj-1,kk-1) + Real(2.)*fine(ii  ,jj,kk-1) +          fine(ii,jj+1,kk-1)
+                             + Real(2.)*fine(ii,jj-1  ,kk) + Real(4.)*fine(ii  ,jj,kk  ) + Real(2.)*fine(ii,jj+1,kk  )
+                                      + fine(ii,jj-1,kk+1) + Real(2.)*fine(ii  ,jj,kk+1) +          fine(ii,jj+1,kk+1));
+        }
+    }
+    else
+    {
+        amrex::Abort("mlndlap_semi_restriction semi direction wrong semi-direction. ");
+    }
+}
+
+//
+// masks
+//
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_set_nodal_mask (int i, int j, int k, Array4<int> const& nmsk,
+                             Array4<int const> const& cmsk) noexcept
+{
+    using namespace nodelap_detail;
+
+    int s = cmsk(i-1,j-1,k-1) + cmsk(i  ,j-1,k-1)
+        +   cmsk(i-1,j  ,k-1) + cmsk(i  ,j  ,k-1)
+        +   cmsk(i-1,j-1,k  ) + cmsk(i  ,j-1,k  )
+        +   cmsk(i-1,j  ,k  ) + cmsk(i  ,j  ,k  );
+    if (s == 8*crse_cell) {
+        nmsk(i,j,k) = crse_node;
+    }
+    else if (s == 8*fine_cell) {
+        nmsk(i,j,k) = fine_node;
+    } else {
+        nmsk(i,j,k) = crse_fine_node;
+    }
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_set_dirichlet_mask (Box const& bx, Array4<int> const& dmsk,
+                                 Array4<int const> const& omsk, Box const& dom,
+                                 GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
+                                 GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
+{
+    const auto lo = amrex::lbound(bx);
+    const auto hi = amrex::ubound(bx);
+    for (int k = lo.z; k <= hi.z; ++k) {
+    for (int j = lo.y; j <= hi.y; ++j) {
+    AMREX_PRAGMA_SIMD
+    for (int i = lo.x; i <= hi.x; ++i) {
+        if (!dmsk(i,j,k)) {
+            dmsk(i,j,k) = (omsk(i-1,j-1,k-1) == 1 || omsk(i,j-1,k-1) == 1 ||
+                           omsk(i-1,j  ,k-1) == 1 || omsk(i,j  ,k-1) == 1 ||
+                           omsk(i-1,j-1,k  ) == 1 || omsk(i,j-1,k  ) == 1 ||
+                           omsk(i-1,j  ,k  ) == 1 || omsk(i,j  ,k  ) == 1);
+        }
+    }}}
+
+    const auto domlo = amrex::lbound(dom);
+    const auto domhi = amrex::ubound(dom);
+
+    if (bclo[0] == LinOpBCType::Dirichlet && lo.x == domlo.x) {
+        for (int k = lo.z; k <= hi.z; ++k) {
+        for (int j = lo.y; j <= hi.y; ++j) {
+            dmsk(lo.x,j,k) = 1;
+        }}
+    }
+
+    if (bchi[0] == LinOpBCType::Dirichlet && hi.x == domhi.x) {
+        for (int k = lo.z; k <= hi.z; ++k) {
+        for (int j = lo.y; j <= hi.y; ++j) {
+            dmsk(hi.x,j,k) = 1;
+        }}
+    }
+
+    if (bclo[1] == LinOpBCType::Dirichlet && lo.y == domlo.y) {
+        for (int k = lo.z; k <= hi.z; ++k) {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            dmsk(i,lo.y,k) = 1;
+        }}
+    }
+
+    if (bchi[1] == LinOpBCType::Dirichlet && hi.y == domhi.y) {
+        for (int k = lo.z; k <= hi.z; ++k) {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            dmsk(i,hi.y,k) = 1;
+        }}
+    }
+
+    if (bclo[2] == LinOpBCType::Dirichlet && lo.z == domlo.z) {
+        for (int j = lo.y; j <= hi.y; ++j) {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            dmsk(i,j,lo.z) = 1;
+        }}
+    }
+
+    if (bchi[2] == LinOpBCType::Dirichlet && hi.z == domhi.z) {
+        for (int j = lo.y; j <= hi.y; ++j) {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            dmsk(i,j,hi.z) = 1;
+        }}
+    }
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlndlap_set_dot_mask (Box const& bx, Array4<Real> const& dmsk,
+                           Array4<int const> const& omsk, Box const& dom,
+                           GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
+                           GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
+{
+    const auto lo = amrex::lbound(bx);
+    const auto hi = amrex::ubound(bx);
+    for (int k = lo.z; k <= hi.z; ++k) {
+    for (int j = lo.y; j <= hi.y; ++j) {
+    AMREX_PRAGMA_SIMD
+    for (int i = lo.x; i <= hi.x; ++i) {
+        dmsk(i,j,k) = static_cast<Real>(omsk(i,j,k));
+    }}}
+
+    const auto domlo = amrex::lbound(dom);
+    const auto domhi = amrex::ubound(dom);
+
+    if ((bclo[0] == LinOpBCType::Neumann || bclo[0] == LinOpBCType::inflow)
+        && lo.x == domlo.x)
+    {
+        for (int k = lo.z; k <= hi.z; ++k) {
+        for (int j = lo.y; j <= hi.y; ++j) {
+            dmsk(lo.x,j,k) *= Real(0.5);
+        }}
+    }
+
+    if ((bchi[0] == LinOpBCType::Neumann || bchi[0] == LinOpBCType::inflow)
+        && hi.x == domhi.x)
+    {
+        for (int k = lo.z; k <= hi.z; ++k) {
+        for (int j = lo.y; j <= hi.y; ++j) {
+            dmsk(hi.x,j,k) *= Real(0.5);
+        }}
+    }
+
+    if ((bclo[1] == LinOpBCType::Neumann || bclo[1] == LinOpBCType::inflow)
+        && lo.y == domlo.y)
+    {
+        for (int k = lo.z; k <= hi.z; ++k) {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            dmsk(i,lo.y,k) *= Real(0.5);
+        }}
+    }
+
+    if ((bchi[1] == LinOpBCType::Neumann || bchi[1] == LinOpBCType::inflow)
+        && hi.y == domhi.y)
+    {
+        for (int k = lo.z; k <= hi.z; ++k) {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            dmsk(i,hi.y,k) *= Real(0.5);
+        }}
+    }
+
+    if ((bclo[2] == LinOpBCType::Neumann || bclo[2] == LinOpBCType::inflow)
+        && lo.z == domlo.z)
+    {
+        for (int j = lo.y; j <= hi.y; ++j) {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            dmsk(i,j,lo.z) *= Real(0.5);
+        }}
+    }
+
+    if ((bchi[2] == LinOpBCType::Neumann || bchi[2] == LinOpBCType::inflow)
+        && hi.z == domhi.z)
+    {
+        for (int j = lo.y; j <= hi.y; ++j) {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            dmsk(i,j,hi.z) *= Real(0.5);
+        }}
+    }
+}
+
+}
+
+#endif
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_K.H
new file mode 100644
index 00000000000..39797f32b83
--- /dev/null
+++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_K.H
@@ -0,0 +1,61 @@
+#ifndef AMREX_ML_NODE_LINOP_K_H_
+#define AMREX_ML_NODE_LINOP_K_H_
+#include <AMReX_Config.H>
+
+#include <AMReX_FArrayBox.H>
+
+namespace amrex::nodelap_detail {
+    constexpr int crse_cell = 0; // Do NOT change the values
+    constexpr int fine_cell = 1;
+    constexpr int crse_node = 0;
+    constexpr int crse_fine_node = 1;
+    constexpr int fine_node = 2;
+}
+
+namespace amrex {
+
+template <typename T>
+void mlndlap_fillbc_cc (Box const& vbx, Array4<T> const& sigma, Box const& domain,
+                        GpuArray<LinOpBCType, AMREX_SPACEDIM> bclo,
+                        GpuArray<LinOpBCType, AMREX_SPACEDIM> bchi) noexcept
+{
+    GpuArray<bool,AMREX_SPACEDIM> bflo{{AMREX_D_DECL(bclo[0] != LinOpBCType::Periodic,
+                                                     bclo[1] != LinOpBCType::Periodic,
+                                                     bclo[2] != LinOpBCType::Periodic)}};
+    GpuArray<bool,AMREX_SPACEDIM> bfhi{{AMREX_D_DECL(bchi[0] != LinOpBCType::Periodic,
+                                                     bchi[1] != LinOpBCType::Periodic,
+                                                     bchi[2] != LinOpBCType::Periodic)}};
+    mlndlap_bc_doit(vbx, sigma, domain, bflo, bfhi);
+}
+
+template <typename T>
+void mlndlap_applybc (Box const& vbx, Array4<T> const& phi, Box const& domain,
+                      GpuArray<LinOpBCType, AMREX_SPACEDIM> bclo,
+                      GpuArray<LinOpBCType, AMREX_SPACEDIM> bchi) noexcept
+{
+    GpuArray<bool,AMREX_SPACEDIM> bflo{{AMREX_D_DECL(bclo[0] == LinOpBCType::Neumann ||
+                                                     bclo[0] == LinOpBCType::inflow,
+                                                     bclo[1] == LinOpBCType::Neumann ||
+                                                     bclo[1] == LinOpBCType::inflow,
+                                                     bclo[2] == LinOpBCType::Neumann ||
+                                                     bclo[2] == LinOpBCType::inflow)}};
+    GpuArray<bool,AMREX_SPACEDIM> bfhi{{AMREX_D_DECL(bchi[0] == LinOpBCType::Neumann ||
+                                                     bchi[0] == LinOpBCType::inflow,
+                                                     bchi[1] == LinOpBCType::Neumann ||
+                                                     bchi[1] == LinOpBCType::inflow,
+                                                     bchi[2] == LinOpBCType::Neumann ||
+                                                     bchi[2] == LinOpBCType::inflow)}};
+    mlndlap_bc_doit(vbx, phi, domain, bflo, bfhi);
+}
+
+}
+
+#if (AMREX_SPACEDIM == 1)
+#include <AMReX_MLNodeLinOp_1D_K.H>
+#elif (AMREX_SPACEDIM == 2)
+#include <AMReX_MLNodeLinOp_2D_K.H>
+#else
+#include <AMReX_MLNodeLinOp_3D_K.H>
+#endif
+
+#endif
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeTensorLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeTensorLaplacian.cpp
index 59718d7624c..e8135aeba14 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLNodeTensorLaplacian.cpp
+++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeTensorLaplacian.cpp
@@ -1,5 +1,5 @@
 #include <AMReX_MLNodeTensorLaplacian.H>
-#include <AMReX_MLNodeLap_K.H>
+#include <AMReX_MLNodeLinOp_K.H>
 #include <AMReX_MLNodeTensorLap_K.H>
 #include <AMReX_MultiFabUtil.H>
 
diff --git a/Src/LinearSolvers/MLMG/Make.package b/Src/LinearSolvers/MLMG/Make.package
index 3609164c919..f54eaf3cf76 100644
--- a/Src/LinearSolvers/MLMG/Make.package
+++ b/Src/LinearSolvers/MLMG/Make.package
@@ -1,6 +1,9 @@
 ifndef AMREX_MLMG_MAKE
        AMREX_MLMG_MAKE := 1
 
+USE_LINEAR_SOLVERS_INCFLO ?= TRUE
+USE_LINEAR_SOLVERS_EM ?= TRUE
+
 CEXE_sources += AMReX_MLMG.cpp
 
 CEXE_headers   += AMReX_MLMG.H
@@ -16,7 +19,7 @@ CEXE_headers   += AMReX_MLLinOp_K.H
 
 CEXE_headers   += AMReX_MLCellLinOp.H
 
-CEXE_headers   += AMReX_MLNodeLinOp.H
+CEXE_headers   += AMReX_MLNodeLinOp.H AMReX_MLNodeLinOp_K.H AMReX_MLNodeLinOp_$(DIM)D_K.H
 CEXE_sources   += AMReX_MLNodeLinOp.cpp
 
 CEXE_headers   += AMReX_MLCellABecLap.H
@@ -39,60 +42,67 @@ ifeq ($(DIM),3)
 CEXE_headers   += AMReX_MLPoisson_2D_K.H
 endif
 
-CEXE_headers   += AMReX_MLNodeLaplacian.H
-CEXE_sources   += AMReX_MLNodeLaplacian.cpp
-CEXE_sources   += AMReX_MLNodeLaplacian_sync.cpp
-CEXE_sources   += AMReX_MLNodeLaplacian_sten.cpp
-CEXE_sources   += AMReX_MLNodeLaplacian_misc.cpp
-CEXE_headers   += AMReX_MLNodeLap_K.H AMReX_MLNodeLap_$(DIM)D_K.H
-ifeq ($(USE_EB),TRUE)
-  CEXE_sources   += AMReX_MLNodeLaplacian_eb.cpp
-endif
-ifeq ($(USE_HYPRE),TRUE)
-  CEXE_sources   += AMReX_MLNodeLaplacian_hypre.cpp
+ifneq ($(BL_NO_FORT),TRUE)
+  CEXE_headers += AMReX_MLLinOp_F.H
+  F90EXE_sources += AMReX_MLLinOp_nd.F90
 endif
 
-CEXE_headers   += AMReX_MLNodeABecLaplacian.H
-CEXE_sources   += AMReX_MLNodeABecLaplacian.cpp
-CEXE_headers   += AMReX_MLNodeABecLap_K.H AMReX_MLNodeABecLap_$(DIM)D_K.H
+ifneq ($(USE_LINEAR_SOLVERS_INCFLO),FALSE)
 
-CEXE_headers   += AMReX_MLNodeTensorLaplacian.H
-CEXE_sources   += AMReX_MLNodeTensorLaplacian.cpp
-CEXE_headers   += AMReX_MLNodeTensorLap_K.H AMReX_MLNodeTensorLap_$(DIM)D_K.H
+  CEXE_headers += AMReX_MLNodeABecLaplacian.H
+  CEXE_sources += AMReX_MLNodeABecLaplacian.cpp
+  CEXE_headers += AMReX_MLNodeABecLap_K.H AMReX_MLNodeABecLap_$(DIM)D_K.H
 
-CEXE_headers   += AMReX_MLTensorOp.H
-CEXE_sources   += AMReX_MLTensorOp.cpp AMReX_MLTensorOp_grad.cpp
-CEXE_headers   += AMReX_MLTensor_K.H AMReX_MLTensor_$(DIM)D_K.H
+  CEXE_headers += AMReX_MLNodeLaplacian.H
+  CEXE_sources += AMReX_MLNodeLaplacian.cpp
+  CEXE_sources += AMReX_MLNodeLaplacian_sync.cpp
+  CEXE_sources += AMReX_MLNodeLaplacian_sten.cpp
+  CEXE_sources += AMReX_MLNodeLaplacian_misc.cpp
+  CEXE_headers += AMReX_MLNodeLap_K.H AMReX_MLNodeLap_$(DIM)D_K.H
+ifeq ($(USE_EB),TRUE)
+  CEXE_sources += AMReX_MLNodeLaplacian_eb.cpp
+endif
+ifeq ($(USE_HYPRE),TRUE)
+  CEXE_sources += AMReX_MLNodeLaplacian_hypre.cpp
+endif
 
-CEXE_headers   += AMReX_MLEBNodeFDLaplacian.H
-CEXE_sources   += AMReX_MLEBNodeFDLaplacian.cpp
-CEXE_headers   += AMReX_MLEBNodeFDLap_K.H
-CEXE_headers   += AMReX_MLEBNodeFDLap_$(DIM)D_K.H
+  CEXE_headers += AMReX_MLTensorOp.H
+  CEXE_sources += AMReX_MLTensorOp.cpp AMReX_MLTensorOp_grad.cpp
+  CEXE_headers += AMReX_MLTensor_K.H AMReX_MLTensor_$(DIM)D_K.H
 
 ifeq ($(USE_EB),TRUE)
-CEXE_headers   += AMReX_MLEBABecLap.H
-CEXE_sources   += AMReX_MLEBABecLap.cpp
-CEXE_sources   += AMReX_MLEBABecLap_F.cpp
-CEXE_headers   += AMReX_MLEBABecLap_K.H
-CEXE_headers   += AMReX_MLEBABecLap_$(DIM)D_K.H
-
-CEXE_headers   += AMReX_MLEBTensorOp.H
-CEXE_sources   += AMReX_MLEBTensorOp.cpp
-CEXE_sources   += AMReX_MLEBTensorOp_bc.cpp
-CEXE_headers   += AMReX_MLEBTensor_K.H AMReX_MLEBTensor_$(DIM)D_K.H
+  CEXE_headers += AMReX_MLEBABecLap.H
+  CEXE_sources += AMReX_MLEBABecLap.cpp
+  CEXE_sources += AMReX_MLEBABecLap_F.cpp
+  CEXE_headers += AMReX_MLEBABecLap_K.H
+  CEXE_headers += AMReX_MLEBABecLap_$(DIM)D_K.H
+
+  CEXE_headers += AMReX_MLEBTensorOp.H
+  CEXE_sources += AMReX_MLEBTensorOp.cpp
+  CEXE_sources += AMReX_MLEBTensorOp_bc.cpp
+  CEXE_headers += AMReX_MLEBTensor_K.H AMReX_MLEBTensor_$(DIM)D_K.H
 endif
 
-ifneq ($(BL_NO_FORT),TRUE)
-  CEXE_headers += AMReX_MLLinOp_F.H
-  F90EXE_sources += AMReX_MLLinOp_nd.F90
-endif
+endif # ifneq ($(USE_LINEAR_SOLVERS_INCFLO),FALSE)
 
+ifneq ($(USE_LINEAR_SOLVERS_EM),FALSE)
 ifneq ($(DIM),1)
   CEXE_headers += AMReX_MLCurlCurl.H
   CEXE_sources += AMReX_MLCurlCurl.cpp
   CEXE_headers += AMReX_MLCurlCurl_K.H
 endif
 
+  CEXE_headers += AMReX_MLEBNodeFDLaplacian.H
+  CEXE_sources += AMReX_MLEBNodeFDLaplacian.cpp
+  CEXE_headers += AMReX_MLEBNodeFDLap_K.H
+  CEXE_headers += AMReX_MLEBNodeFDLap_$(DIM)D_K.H
+
+  CEXE_headers += AMReX_MLNodeTensorLaplacian.H
+  CEXE_sources += AMReX_MLNodeTensorLaplacian.cpp
+  CEXE_headers += AMReX_MLNodeTensorLap_K.H AMReX_MLNodeTensorLap_$(DIM)D_K.H
+
+endif # ifneq ($(USE_LINEAR_SOLVERS_EM),FALSE)
+
 VPATH_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers/MLMG
 INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers/MLMG
 
diff --git a/Tools/CMake/AMReXConfig.cmake.in b/Tools/CMake/AMReXConfig.cmake.in
index f5045b715cb..6ae09ba0a00 100644
--- a/Tools/CMake/AMReXConfig.cmake.in
+++ b/Tools/CMake/AMReXConfig.cmake.in
@@ -74,6 +74,8 @@ set(AMReX_AMRLEVEL_FOUND            @AMReX_AMRLEVEL@)
 set(AMReX_EB_FOUND                  @AMReX_EB@)
 set(AMReX_FINTERFACES_FOUND         @AMReX_FORTRAN_INTERFACES@)
 set(AMReX_LSOLVERS_FOUND            @AMReX_LINEAR_SOLVERS@)
+set(AMReX_LSOLVERS_INCFLO_FOUND     @AMReX_LINEAR_SOLVERS_INCFLO@)
+set(AMReX_LSOLVERS_EM_FOUND         @AMReX_LINEAR_SOLVERS_EM@)
 set(AMReX_AMRDATA_FOUND             @AMReX_AMRDATA@)
 set(AMReX_PARTICLES_FOUND           @AMReX_PARTICLES@)
 set(AMReX_P@AMReX_PARTICLES_PRECISION@_FOUND ON)
@@ -129,6 +131,8 @@ set(AMReX_AMRLEVEL                  @AMReX_AMRLEVEL@)
 set(AMReX_EB                        @AMReX_EB@)
 set(AMReX_FINTERFACES               @AMReX_FORTRAN_INTERFACES@)
 set(AMReX_LSOLVERS                  @AMReX_LINEAR_SOLVERS@)
+set(AMReX_LSOLVERS_INCFLO           @AMReX_LINEAR_SOLVERS_INCFLO@)
+set(AMReX_LSOLVERS_EM               @AMReX_LINEAR_SOLVERS_EM@)
 set(AMReX_AMRDATA                   @AMReX_AMRDATA@)
 set(AMReX_PARTICLES                 @AMReX_PARTICLES@)
 set(AMReX_PARTICLES_PRECISION       @AMReX_PARTICLES_PRECISION@)
diff --git a/Tools/CMake/AMReXOptions.cmake b/Tools/CMake/AMReXOptions.cmake
index 3e5d4c8bdb4..382459e38f1 100644
--- a/Tools/CMake/AMReXOptions.cmake
+++ b/Tools/CMake/AMReXOptions.cmake
@@ -284,6 +284,16 @@ print_option(AMReX_FORTRAN_INTERFACES)
 option( AMReX_LINEAR_SOLVERS  "Build AMReX Linear solvers" ON )
 print_option( AMReX_LINEAR_SOLVERS )
 
+cmake_dependent_option( AMReX_LINEAR_SOLVERS_INCFLO
+    "Build AMReX Linear solvers useful for incompressible flow codes" ON
+    "AMReX_LINEAR_SOLVERS" OFF)
+print_option( AMReX_LINEAR_SOLVERS_INCFLO )
+
+cmake_dependent_option( AMReX_LINEAR_SOLVERS_EM
+    "Build AMReX Linear solvers useful for electromagnetic codes" ON
+    "AMReX_LINEAR_SOLVERS" OFF)
+print_option( AMReX_LINEAR_SOLVERS_EM )
+
 option( AMReX_AMRDATA "Build data services" OFF )
 print_option( AMReX_AMRDATA )
 
diff --git a/Tools/libamrex/configure.py b/Tools/libamrex/configure.py
index 1545f86dfb2..8988c18b965 100755
--- a/Tools/libamrex/configure.py
+++ b/Tools/libamrex/configure.py
@@ -61,6 +61,14 @@ def configure(argv):
                         help="Enable AMReX linear solvers [default=yes]",
                         choices=["yes","no"],
                         default="yes")
+    parser.add_argument("--enable-linear-solver-incflo",
+                        help="Enable AMReX linear solvers for incompressible flow codes [default=yes]",
+                        choices=["yes","no"],
+                        default="yes")
+    parser.add_argument("--enable-linear-solver-em",
+                        help="Enable AMReX linear solvers for electromagnetic codes [default=yes]",
+                        choices=["yes","no"],
+                        default="yes")
     parser.add_argument("--enable-hypre",
                         help="Enable Hypre as an option for bottom solver of AMReX linear solvers [default=no]",
                         choices=["yes","no"],
@@ -144,6 +152,8 @@ def configure(argv):
     f.write("USE_PARTICLES = {}\n".format("FALSE" if args.enable_particle == "no" else "TRUE"))
     f.write("USE_FORTRAN_INTERFACE = {}\n".format("FALSE" if args.enable_fortran_api == "no" else "TRUE"))
     f.write("USE_LINEAR_SOLVERS = {}\n".format("FALSE" if args.enable_linear_solver == "no" else "TRUE"))
+    f.write("USE_LINEAR_SOLVERS_INCFLO = {}\n".format("FALSE" if args.enable_linear_solver_incflo == "no" else "TRUE"))
+    f.write("USE_LINEAR_SOLVERS_EM = {}\n".format("FALSE" if args.enable_linear_solver_em == "no" else "TRUE"))
     f.write("USE_HYPRE = {}\n".format("TRUE" if args.enable_hypre == "yes" else "FALSE"))
     f.write("USE_PETSC = {}\n".format("TRUE" if args.enable_petsc == "yes" else "FALSE"))
     f.write("USE_EB = {}\n".format("TRUE" if args.enable_eb == "yes" else "FALSE"))